diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 505fe77f89..309524886e 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -34,11 +34,11 @@ This is a template helping you to create an issue which can be processed as quic - [ ] I report the issue, it's not a question - [ ] I checked the problem with documentation, FAQ, open issues, - forum.opencv.org, Stack Overflow, etc and have not found solution + forum.opencv.org, Stack Overflow, etc and have not found any solution - - [ ] I updated to latest OpenCV version and the issue is still there + - [ ] I updated to the latest OpenCV version and the issue is still there - [ ] There is reproducer code and related data files: videos, images, onnx, etc diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 0454d1aed8..5e2e911cc8 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -3,9 +3,9 @@ See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. -- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or other license that is incompatible with OpenCV -- [ ] The PR is proposed to proper branch -- [ ] There is reference to original bug report and related work +- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV +- [ ] The PR is proposed to the proper branch +- [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c8c1a0172..8ec7b65030 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -244,7 +244,7 @@ OCV_OPTION(WITH_AVFOUNDATION "Use AVFoundation for Video I/O (iOS/Mac)" ON OCV_OPTION(WITH_CAP_IOS "Enable iOS video capture" ON VISIBLE_IF IOS VERIFY HAVE_CAP_IOS) -OCV_OPTION(WITH_CAROTENE "Use NVidia carotene acceleration library for ARM platform" ON +OCV_OPTION(WITH_CAROTENE "Use NVidia carotene acceleration library for ARM platform" (NOT CV_DISABLE_OPTIMIZATION) VISIBLE_IF (ARM OR AARCH64) AND NOT IOS) OCV_OPTION(WITH_CPUFEATURES "Use cpufeatures Android library" ON VISIBLE_IF ANDROID @@ -291,12 +291,10 @@ OCV_OPTION(WITH_HALIDE "Include Halide support" OFF OCV_OPTION(WITH_VULKAN "Include Vulkan support" OFF VISIBLE_IF TRUE VERIFY HAVE_VULKAN) -OCV_OPTION(WITH_INF_ENGINE "Include Intel Inference Engine support" OFF +# replacement for deprecated options: WITH_INF_ENGINE, WITH_NGRAPH +OCV_OPTION(WITH_OPENVINO "Include Intel OpenVINO toolkit support" (WITH_INF_ENGINE) VISIBLE_IF TRUE - VERIFY INF_ENGINE_TARGET) -OCV_OPTION(WITH_NGRAPH "Include nGraph support" WITH_INF_ENGINE - VISIBLE_IF TRUE - VERIFY TARGET ngraph::ngraph) + VERIFY TARGET ocv.3rdparty.openvino) OCV_OPTION(WITH_WEBNN "Include WebNN support" OFF VISIBLE_IF TRUE VERIFY HAVE_WEBNN) @@ -809,7 +807,7 @@ if(WITH_WEBNN) endif() # --- Inference Engine --- -if(WITH_INF_ENGINE) +if(WITH_INF_ENGINE OR WITH_OPENVINO) include(cmake/OpenCVDetectInferenceEngine.cmake) endif() @@ -1548,55 +1546,61 @@ if(WITH_HALIDE OR HAVE_HALIDE) status(" Halide:" HAVE_HALIDE THEN "YES (${HALIDE_LIBRARIES} ${HALIDE_INCLUDE_DIRS})" ELSE NO) endif() -if(WITH_INF_ENGINE OR INF_ENGINE_TARGET) - if(INF_ENGINE_TARGET) - list(GET INF_ENGINE_TARGET 0 ie_target) - set(__msg "YES (${INF_ENGINE_RELEASE} / ${INF_ENGINE_VERSION})") - ocv_get_imported_target(ie_target "${ie_target}") - get_target_property(_lib ${ie_target} IMPORTED_LOCATION) - get_target_property(_lib_imp_rel ${ie_target} IMPORTED_IMPLIB_RELEASE) - get_target_property(_lib_imp_dbg ${ie_target} IMPORTED_IMPLIB_DEBUG) - get_target_property(_lib_rel ${ie_target} IMPORTED_LOCATION_RELEASE) - get_target_property(_lib_dbg ${ie_target} IMPORTED_LOCATION_DEBUG) - ocv_build_features_string(_lib - IF _lib THEN "${_lib}" - IF _lib_imp_rel AND _lib_imp_dbg THEN "${_lib_imp_rel} / ${_lib_imp_dbg}" - IF _lib_rel AND _lib_dbg THEN "${_lib_rel} / ${_lib_dbg}" - IF _lib_rel THEN "${_lib_rel}" - IF _lib_dbg THEN "${_lib_dbg}" - ELSE "unknown" - ) - get_target_property(_inc ${ie_target} INTERFACE_INCLUDE_DIRECTORIES) - status(" Inference Engine:" "${__msg}") - status(" * libs:" "${_lib}") - status(" * includes:" "${_inc}") - else() - status(" Inference Engine:" "NO") +if(HAVE_OPENVINO + OR (WITH_OPENVINO AND NOT WITH_INF_ENGINE AND NOT INF_ENGINE_TARGET) +) + status(" OpenVINO:" TARGET openvino::runtime THEN "YES (${OpenVINO_VERSION})" ELSE "NO") +else() + if(WITH_INF_ENGINE OR INF_ENGINE_TARGET) + if(INF_ENGINE_TARGET) + list(GET INF_ENGINE_TARGET 0 ie_target) + set(__msg "YES (${INF_ENGINE_RELEASE} / ${INF_ENGINE_VERSION})") + ocv_get_imported_target(ie_target "${ie_target}") + get_target_property(_lib ${ie_target} IMPORTED_LOCATION) + get_target_property(_lib_imp_rel ${ie_target} IMPORTED_IMPLIB_RELEASE) + get_target_property(_lib_imp_dbg ${ie_target} IMPORTED_IMPLIB_DEBUG) + get_target_property(_lib_rel ${ie_target} IMPORTED_LOCATION_RELEASE) + get_target_property(_lib_dbg ${ie_target} IMPORTED_LOCATION_DEBUG) + ocv_build_features_string(_lib + IF _lib THEN "${_lib}" + IF _lib_imp_rel AND _lib_imp_dbg THEN "${_lib_imp_rel} / ${_lib_imp_dbg}" + IF _lib_rel AND _lib_dbg THEN "${_lib_rel} / ${_lib_dbg}" + IF _lib_rel THEN "${_lib_rel}" + IF _lib_dbg THEN "${_lib_dbg}" + ELSE "unknown" + ) + get_target_property(_inc ${ie_target} INTERFACE_INCLUDE_DIRECTORIES) + status(" Inference Engine:" "${__msg}") + status(" * libs:" "${_lib}") + status(" * includes:" "${_inc}") + else() + status(" Inference Engine:" "NO") + endif() endif() -endif() -if(WITH_NGRAPH OR HAVE_NGRAPH) - if(HAVE_NGRAPH) - ocv_get_imported_target(__target ngraph::ngraph) - set(__msg "YES (${ngraph_VERSION})") - get_target_property(_lib ${__target} IMPORTED_LOCATION) - get_target_property(_lib_imp_rel ${__target} IMPORTED_IMPLIB_RELEASE) - get_target_property(_lib_imp_dbg ${__target} IMPORTED_IMPLIB_DEBUG) - get_target_property(_lib_rel ${__target} IMPORTED_LOCATION_RELEASE) - get_target_property(_lib_dbg ${__target} IMPORTED_LOCATION_DEBUG) - ocv_build_features_string(_lib - IF _lib THEN "${_lib}" - IF _lib_imp_rel AND _lib_imp_dbg THEN "${_lib_imp_rel} / ${_lib_imp_dbg}" - IF _lib_rel AND _lib_dbg THEN "${_lib_rel} / ${_lib_dbg}" - IF _lib_rel THEN "${_lib_rel}" - IF _lib_dbg THEN "${_lib_dbg}" - ELSE "unknown" - ) - get_target_property(_inc ${__target} INTERFACE_INCLUDE_DIRECTORIES) - status(" nGraph:" "${__msg}") - status(" * libs:" "${_lib}") - status(" * includes:" "${_inc}") - else() - status(" nGraph:" "NO") + if(WITH_NGRAPH OR HAVE_NGRAPH) + if(HAVE_NGRAPH) + ocv_get_imported_target(__target ngraph::ngraph) + set(__msg "YES (${ngraph_VERSION})") + get_target_property(_lib ${__target} IMPORTED_LOCATION) + get_target_property(_lib_imp_rel ${__target} IMPORTED_IMPLIB_RELEASE) + get_target_property(_lib_imp_dbg ${__target} IMPORTED_IMPLIB_DEBUG) + get_target_property(_lib_rel ${__target} IMPORTED_LOCATION_RELEASE) + get_target_property(_lib_dbg ${__target} IMPORTED_LOCATION_DEBUG) + ocv_build_features_string(_lib + IF _lib THEN "${_lib}" + IF _lib_imp_rel AND _lib_imp_dbg THEN "${_lib_imp_rel} / ${_lib_imp_dbg}" + IF _lib_rel AND _lib_dbg THEN "${_lib_rel} / ${_lib_dbg}" + IF _lib_rel THEN "${_lib_rel}" + IF _lib_dbg THEN "${_lib_dbg}" + ELSE "unknown" + ) + get_target_property(_inc ${__target} INTERFACE_INCLUDE_DIRECTORIES) + status(" nGraph:" "${__msg}") + status(" * libs:" "${_lib}") + status(" * includes:" "${_inc}") + else() + status(" nGraph:" "NO") + endif() endif() endif() diff --git a/apps/opencv_stitching_tool/opencv_stitching/blender.py b/apps/opencv_stitching_tool/opencv_stitching/blender.py index 2b6e74a810..5ee2a717f1 100644 --- a/apps/opencv_stitching_tool/opencv_stitching/blender.py +++ b/apps/opencv_stitching_tool/opencv_stitching/blender.py @@ -26,8 +26,8 @@ class Blender: elif self.blender_type == "multiband": self.blender = cv.detail_MultiBandBlender() - self.blender.setNumBands((np.log(blend_width) / - np.log(2.) - 1.).astype(np.int)) + self.blender.setNumBands(int((np.log(blend_width) / + np.log(2.) - 1.))) elif self.blender_type == "feather": self.blender = cv.detail_FeatherBlender() @@ -45,4 +45,12 @@ class Blender: result_mask = None result, result_mask = self.blender.blend(result, result_mask) result = cv.convertScaleAbs(result) - return result + return result, result_mask + + @classmethod + def create_panorama(cls, imgs, masks, corners, sizes): + blender = cls("no") + blender.prepare(corners, sizes) + for img, mask, corner in zip(imgs, masks, corners): + blender.feed(img, mask, corner) + return blender.blend() diff --git a/apps/opencv_stitching_tool/opencv_stitching/cropper.py b/apps/opencv_stitching_tool/opencv_stitching/cropper.py new file mode 100644 index 0000000000..243a6dc7b0 --- /dev/null +++ b/apps/opencv_stitching_tool/opencv_stitching/cropper.py @@ -0,0 +1,149 @@ +from collections import namedtuple +import cv2 as cv + +from .blender import Blender +from .stitching_error import StitchingError + + +class Rectangle(namedtuple('Rectangle', 'x y width height')): + __slots__ = () + + @property + def area(self): + return self.width * self.height + + @property + def corner(self): + return (self.x, self.y) + + @property + def size(self): + return (self.width, self.height) + + @property + def x2(self): + return self.x + self.width + + @property + def y2(self): + return self.y + self.height + + def times(self, x): + return Rectangle(*(int(round(i*x)) for i in self)) + + def draw_on(self, img, color=(0, 0, 255), size=1): + if len(img.shape) == 2: + img = cv.cvtColor(img, cv.COLOR_GRAY2RGB) + start_point = (self.x, self.y) + end_point = (self.x2-1, self.y2-1) + cv.rectangle(img, start_point, end_point, color, size) + return img + + +class Cropper: + + DEFAULT_CROP = False + + def __init__(self, crop=DEFAULT_CROP): + self.do_crop = crop + self.overlapping_rectangles = [] + self.cropping_rectangles = [] + + def prepare(self, imgs, masks, corners, sizes): + if self.do_crop: + mask = self.estimate_panorama_mask(imgs, masks, corners, sizes) + self.compile_numba_functionality() + lir = self.estimate_largest_interior_rectangle(mask) + corners = self.get_zero_center_corners(corners) + rectangles = self.get_rectangles(corners, sizes) + self.overlapping_rectangles = self.get_overlaps( + rectangles, lir) + self.intersection_rectangles = self.get_intersections( + rectangles, self.overlapping_rectangles) + + def crop_images(self, imgs, aspect=1): + for idx, img in enumerate(imgs): + yield self.crop_img(img, idx, aspect) + + def crop_img(self, img, idx, aspect=1): + if self.do_crop: + intersection_rect = self.intersection_rectangles[idx] + scaled_intersection_rect = intersection_rect.times(aspect) + cropped_img = self.crop_rectangle(img, scaled_intersection_rect) + return cropped_img + return img + + def crop_rois(self, corners, sizes, aspect=1): + if self.do_crop: + scaled_overlaps = \ + [r.times(aspect) for r in self.overlapping_rectangles] + cropped_corners = [r.corner for r in scaled_overlaps] + cropped_corners = self.get_zero_center_corners(cropped_corners) + cropped_sizes = [r.size for r in scaled_overlaps] + return cropped_corners, cropped_sizes + return corners, sizes + + @staticmethod + def estimate_panorama_mask(imgs, masks, corners, sizes): + _, mask = Blender.create_panorama(imgs, masks, corners, sizes) + return mask + + def compile_numba_functionality(self): + # numba functionality is only imported if cropping + # is explicitely desired + try: + import numba + except ModuleNotFoundError: + raise StitchingError("Numba is needed for cropping but not installed") + from .largest_interior_rectangle import largest_interior_rectangle + self.largest_interior_rectangle = largest_interior_rectangle + + def estimate_largest_interior_rectangle(self, mask): + lir = self.largest_interior_rectangle(mask) + lir = Rectangle(*lir) + return lir + + @staticmethod + def get_zero_center_corners(corners): + min_corner_x = min([corner[0] for corner in corners]) + min_corner_y = min([corner[1] for corner in corners]) + return [(x - min_corner_x, y - min_corner_y) for x, y in corners] + + @staticmethod + def get_rectangles(corners, sizes): + rectangles = [] + for corner, size in zip(corners, sizes): + rectangle = Rectangle(*corner, *size) + rectangles.append(rectangle) + return rectangles + + @staticmethod + def get_overlaps(rectangles, lir): + return [Cropper.get_overlap(r, lir) for r in rectangles] + + @staticmethod + def get_overlap(rectangle1, rectangle2): + x1 = max(rectangle1.x, rectangle2.x) + y1 = max(rectangle1.y, rectangle2.y) + x2 = min(rectangle1.x2, rectangle2.x2) + y2 = min(rectangle1.y2, rectangle2.y2) + if x2 < x1 or y2 < y1: + raise StitchingError("Rectangles do not overlap!") + return Rectangle(x1, y1, x2-x1, y2-y1) + + @staticmethod + def get_intersections(rectangles, overlapping_rectangles): + return [Cropper.get_intersection(r, overlap_r) for r, overlap_r + in zip(rectangles, overlapping_rectangles)] + + @staticmethod + def get_intersection(rectangle, overlapping_rectangle): + x = abs(overlapping_rectangle.x - rectangle.x) + y = abs(overlapping_rectangle.y - rectangle.y) + width = overlapping_rectangle.width + height = overlapping_rectangle.height + return Rectangle(x, y, width, height) + + @staticmethod + def crop_rectangle(img, rectangle): + return img[rectangle.y:rectangle.y2, rectangle.x:rectangle.x2] diff --git a/apps/opencv_stitching_tool/opencv_stitching/feature_matcher.py b/apps/opencv_stitching_tool/opencv_stitching/feature_matcher.py index 2b1d5e6461..2a3fc8c434 100644 --- a/apps/opencv_stitching_tool/opencv_stitching/feature_matcher.py +++ b/apps/opencv_stitching_tool/opencv_stitching/feature_matcher.py @@ -19,10 +19,10 @@ class FeatureMatcher: self.matcher = cv.detail_AffineBestOf2NearestMatcher(**kwargs) elif range_width == -1: """https://docs.opencv.org/5.x/d4/d26/classcv_1_1detail_1_1BestOf2NearestMatcher.html""" # noqa - self.matcher = cv.detail.BestOf2NearestMatcher_create(**kwargs) + self.matcher = cv.detail_BestOf2NearestMatcher(**kwargs) else: """https://docs.opencv.org/5.x/d8/d72/classcv_1_1detail_1_1BestOf2NearestRangeMatcher.html""" # noqa - self.matcher = cv.detail.BestOf2NearestRangeMatcher_create( + self.matcher = cv.detail_BestOf2NearestRangeMatcher( range_width, **kwargs ) diff --git a/apps/opencv_stitching_tool/opencv_stitching/image_handler.py b/apps/opencv_stitching_tool/opencv_stitching/image_handler.py index a3b76b288a..3be9ff4817 100644 --- a/apps/opencv_stitching_tool/opencv_stitching/image_handler.py +++ b/apps/opencv_stitching_tool/opencv_stitching/image_handler.py @@ -1,6 +1,6 @@ import cv2 as cv -from .megapix_downscaler import MegapixDownscaler +from .megapix_scaler import MegapixDownscaler from .stitching_error import StitchingError class ImageHandler: @@ -35,7 +35,7 @@ class ImageHandler: def resize_to_low_resolution(self, medium_imgs=None): if medium_imgs and self.scales_set: - return self.resize_medium_to_low(medium_imgs) + return self.resize_imgs_by_scaler(medium_imgs, self.low_scaler) return self.read_and_resize_imgs(self.low_scaler) def resize_to_final_resolution(self): @@ -45,9 +45,9 @@ class ImageHandler: for img, size in self.input_images(): yield self.resize_img_by_scaler(scaler, size, img) - def resize_medium_to_low(self, medium_imgs): + def resize_imgs_by_scaler(self, medium_imgs, scaler): for img, size in zip(medium_imgs, self.img_sizes): - yield self.resize_img_by_scaler(self.low_scaler, size, img) + yield self.resize_img_by_scaler(scaler, size, img) @staticmethod def resize_img_by_scaler(scaler, size, img): @@ -92,3 +92,14 @@ class ImageHandler: def get_final_to_low_ratio(self): return self.low_scaler.scale / self.final_scaler.scale + + def get_low_to_final_ratio(self): + return self.final_scaler.scale / self.low_scaler.scale + + def get_final_img_sizes(self): + return [self.final_scaler.get_scaled_img_size(sz) + for sz in self.img_sizes] + + def get_low_img_sizes(self): + return [self.low_scaler.get_scaled_img_size(sz) + for sz in self.img_sizes] diff --git a/apps/opencv_stitching_tool/opencv_stitching/largest_interior_rectangle.py b/apps/opencv_stitching_tool/opencv_stitching/largest_interior_rectangle.py new file mode 100644 index 0000000000..5f0a82f7b9 --- /dev/null +++ b/apps/opencv_stitching_tool/opencv_stitching/largest_interior_rectangle.py @@ -0,0 +1,303 @@ +import numpy as np +import numba as nb +import cv2 as cv + +from .stitching_error import StitchingError + + +def largest_interior_rectangle(cells): + outline = get_outline(cells) + adjacencies = adjacencies_all_directions(cells) + s_map, _, saddle_candidates_map = create_maps(outline, adjacencies) + lir1 = biggest_span_in_span_map(s_map) + + candidate_cells = cells_of_interest(saddle_candidates_map) + s_map = span_map(adjacencies[0], adjacencies[2], candidate_cells) + lir2 = biggest_span_in_span_map(s_map) + + lir = biggest_rectangle(lir1, lir2) + return lir + + +def get_outline(cells): + contours, hierarchy = \ + cv.findContours(cells, cv.RETR_TREE, cv.CHAIN_APPROX_NONE) + # TODO support multiple contours + # test that only one regular contour exists + if not hierarchy.shape == (1, 1, 4) or not np.all(hierarchy == -1): + raise StitchingError("Invalid Contour. Try without cropping.") + contour = contours[0][:, 0, :] + x_values = contour[:, 0].astype("uint32", order="C") + y_values = contour[:, 1].astype("uint32", order="C") + return x_values, y_values + + +@nb.njit('uint32[:,::1](uint8[:,::1], boolean)', parallel=True, cache=True) +def horizontal_adjacency(cells, direction): + result = np.zeros(cells.shape, dtype=np.uint32) + for y in nb.prange(cells.shape[0]): + span = 0 + if direction: + iterator = range(cells.shape[1]-1, -1, -1) + else: + iterator = range(cells.shape[1]) + for x in iterator: + if cells[y, x] > 0: + span += 1 + else: + span = 0 + result[y, x] = span + return result + + +@nb.njit('uint32[:,::1](uint8[:,::1], boolean)', parallel=True, cache=True) +def vertical_adjacency(cells, direction): + result = np.zeros(cells.shape, dtype=np.uint32) + for x in nb.prange(cells.shape[1]): + span = 0 + if direction: + iterator = range(cells.shape[0]-1, -1, -1) + else: + iterator = range(cells.shape[0]) + for y in iterator: + if cells[y, x] > 0: + span += 1 + else: + span = 0 + result[y, x] = span + return result + + +@nb.njit(cache=True) +def adjacencies_all_directions(cells): + h_left2right = horizontal_adjacency(cells, 1) + h_right2left = horizontal_adjacency(cells, 0) + v_top2bottom = vertical_adjacency(cells, 1) + v_bottom2top = vertical_adjacency(cells, 0) + return h_left2right, h_right2left, v_top2bottom, v_bottom2top + + +@nb.njit('uint32(uint32[:])', cache=True) +def predict_vector_size(array): + zero_indices = np.where(array == 0)[0] + if len(zero_indices) == 0: + if len(array) == 0: + return 0 + return len(array) + return zero_indices[0] + + +@nb.njit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True) +def h_vector_top2bottom(h_adjacency, x, y): + vector_size = predict_vector_size(h_adjacency[y:, x]) + h_vector = np.zeros(vector_size, dtype=np.uint32) + h = np.Inf + for p in range(vector_size): + h = np.minimum(h_adjacency[y+p, x], h) + h_vector[p] = h + h_vector = np.unique(h_vector)[::-1] + return h_vector + + +@nb.njit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True) +def h_vector_bottom2top(h_adjacency, x, y): + vector_size = predict_vector_size(np.flip(h_adjacency[:y+1, x])) + h_vector = np.zeros(vector_size, dtype=np.uint32) + h = np.Inf + for p in range(vector_size): + h = np.minimum(h_adjacency[y-p, x], h) + h_vector[p] = h + h_vector = np.unique(h_vector)[::-1] + return h_vector + + +@nb.njit(cache=True) +def h_vectors_all_directions(h_left2right, h_right2left, x, y): + h_l2r_t2b = h_vector_top2bottom(h_left2right, x, y) + h_r2l_t2b = h_vector_top2bottom(h_right2left, x, y) + h_l2r_b2t = h_vector_bottom2top(h_left2right, x, y) + h_r2l_b2t = h_vector_bottom2top(h_right2left, x, y) + return h_l2r_t2b, h_r2l_t2b, h_l2r_b2t, h_r2l_b2t + + +@nb.njit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True) +def v_vector_left2right(v_adjacency, x, y): + vector_size = predict_vector_size(v_adjacency[y, x:]) + v_vector = np.zeros(vector_size, dtype=np.uint32) + v = np.Inf + for q in range(vector_size): + v = np.minimum(v_adjacency[y, x+q], v) + v_vector[q] = v + v_vector = np.unique(v_vector)[::-1] + return v_vector + + +@nb.njit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True) +def v_vector_right2left(v_adjacency, x, y): + vector_size = predict_vector_size(np.flip(v_adjacency[y, :x+1])) + v_vector = np.zeros(vector_size, dtype=np.uint32) + v = np.Inf + for q in range(vector_size): + v = np.minimum(v_adjacency[y, x-q], v) + v_vector[q] = v + v_vector = np.unique(v_vector)[::-1] + return v_vector + + +@nb.njit(cache=True) +def v_vectors_all_directions(v_top2bottom, v_bottom2top, x, y): + v_l2r_t2b = v_vector_left2right(v_top2bottom, x, y) + v_r2l_t2b = v_vector_right2left(v_top2bottom, x, y) + v_l2r_b2t = v_vector_left2right(v_bottom2top, x, y) + v_r2l_b2t = v_vector_right2left(v_bottom2top, x, y) + return v_l2r_t2b, v_r2l_t2b, v_l2r_b2t, v_r2l_b2t + + +@nb.njit('uint32[:,:](uint32[:], uint32[:])', cache=True) +def spans(h_vector, v_vector): + spans = np.stack((h_vector, v_vector[::-1]), axis=1) + return spans + + +@nb.njit('uint32[:](uint32[:,:])', cache=True) +def biggest_span(spans): + if len(spans) == 0: + return np.array([0, 0], dtype=np.uint32) + areas = spans[:, 0] * spans[:, 1] + biggest_span_index = np.where(areas == np.amax(areas))[0][0] + return spans[biggest_span_index] + + +@nb.njit(cache=True) +def spans_all_directions(h_vectors, v_vectors): + span_l2r_t2b = spans(h_vectors[0], v_vectors[0]) + span_r2l_t2b = spans(h_vectors[1], v_vectors[1]) + span_l2r_b2t = spans(h_vectors[2], v_vectors[2]) + span_r2l_b2t = spans(h_vectors[3], v_vectors[3]) + return span_l2r_t2b, span_r2l_t2b, span_l2r_b2t, span_r2l_b2t + + +@nb.njit(cache=True) +def get_n_directions(spans_all_directions): + n_directions = 1 + for spans in spans_all_directions: + all_x_1 = np.all(spans[:, 0] == 1) + all_y_1 = np.all(spans[:, 1] == 1) + if not all_x_1 and not all_y_1: + n_directions += 1 + return n_directions + + +@nb.njit(cache=True) +def get_xy_array(x, y, spans, mode=0): + """0 - flip none, 1 - flip x, 2 - flip y, 3 - flip both""" + xy = spans.copy() + xy[:, 0] = x + xy[:, 1] = y + if mode == 1: + xy[:, 0] = xy[:, 0] - spans[:, 0] + 1 + if mode == 2: + xy[:, 1] = xy[:, 1] - spans[:, 1] + 1 + if mode == 3: + xy[:, 0] = xy[:, 0] - spans[:, 0] + 1 + xy[:, 1] = xy[:, 1] - spans[:, 1] + 1 + return xy + + +@nb.njit(cache=True) +def get_xy_arrays(x, y, spans_all_directions): + xy_l2r_t2b = get_xy_array(x, y, spans_all_directions[0], 0) + xy_r2l_t2b = get_xy_array(x, y, spans_all_directions[1], 1) + xy_l2r_b2t = get_xy_array(x, y, spans_all_directions[2], 2) + xy_r2l_b2t = get_xy_array(x, y, spans_all_directions[3], 3) + return xy_l2r_t2b, xy_r2l_t2b, xy_l2r_b2t, xy_r2l_b2t + + +@nb.njit(cache=True) +def point_on_outline(x, y, outline): + x_vals, y_vals = outline + x_true = x_vals == x + y_true = y_vals == y + both_true = np.logical_and(x_true, y_true) + return np.any(both_true) + + +@nb.njit('Tuple((uint32[:,:,::1], uint8[:,::1], uint8[:,::1]))' + '(UniTuple(uint32[:], 2), UniTuple(uint32[:,::1], 4))', + parallel=True, cache=True) +def create_maps(outline, adjacencies): + x_values, y_values = outline + h_left2right, h_right2left, v_top2bottom, v_bottom2top = adjacencies + + shape = h_left2right.shape + span_map = np.zeros(shape + (2,), "uint32") + direction_map = np.zeros(shape, "uint8") + saddle_candidates_map = np.zeros(shape, "uint8") + + for idx in nb.prange(len(x_values)): + x, y = x_values[idx], y_values[idx] + h_vectors = h_vectors_all_directions(h_left2right, h_right2left, x, y) + v_vectors = v_vectors_all_directions(v_top2bottom, v_bottom2top, x, y) + span_arrays = spans_all_directions(h_vectors, v_vectors) + n = get_n_directions(span_arrays) + direction_map[y, x] = n + xy_arrays = get_xy_arrays(x, y, span_arrays) + for direction_idx in range(4): + xy_array = xy_arrays[direction_idx] + span_array = span_arrays[direction_idx] + for span_idx in range(span_array.shape[0]): + x, y = xy_array[span_idx][0], xy_array[span_idx][1] + w, h = span_array[span_idx][0], span_array[span_idx][1] + if w*h > span_map[y, x, 0] * span_map[y, x, 1]: + span_map[y, x, :] = np.array([w, h], "uint32") + if n == 3 and not point_on_outline(x, y, outline): + saddle_candidates_map[y, x] = np.uint8(255) + + return span_map, direction_map, saddle_candidates_map + + +def cells_of_interest(cells): + y_vals, x_vals = cells.nonzero() + x_vals = x_vals.astype("uint32", order="C") + y_vals = y_vals.astype("uint32", order="C") + return x_vals, y_vals + + +@nb.njit('uint32[:, :, :]' + '(uint32[:,::1], uint32[:,::1], UniTuple(uint32[:], 2))', + parallel=True, cache=True) +def span_map(h_adjacency_left2right, + v_adjacency_top2bottom, + cells_of_interest): + + x_values, y_values = cells_of_interest + + span_map = np.zeros(h_adjacency_left2right.shape + (2,), dtype=np.uint32) + + for idx in nb.prange(len(x_values)): + x, y = x_values[idx], y_values[idx] + h_vector = h_vector_top2bottom(h_adjacency_left2right, x, y) + v_vector = v_vector_left2right(v_adjacency_top2bottom, x, y) + s = spans(h_vector, v_vector) + s = biggest_span(s) + span_map[y, x, :] = s + + return span_map + + +@nb.njit('uint32[:](uint32[:, :, :])', cache=True) +def biggest_span_in_span_map(span_map): + areas = span_map[:, :, 0] * span_map[:, :, 1] + largest_rectangle_indices = np.where(areas == np.amax(areas)) + x = largest_rectangle_indices[1][0] + y = largest_rectangle_indices[0][0] + span = span_map[y, x] + return np.array([x, y, span[0], span[1]], dtype=np.uint32) + + +def biggest_rectangle(*args): + biggest_rect = np.array([0, 0, 0, 0], dtype=np.uint32) + for rect in args: + if rect[2] * rect[3] > biggest_rect[2] * biggest_rect[3]: + biggest_rect = rect + return biggest_rect diff --git a/apps/opencv_stitching_tool/opencv_stitching/megapix_downscaler.py b/apps/opencv_stitching_tool/opencv_stitching/megapix_downscaler.py deleted file mode 100644 index f7553acc2e..0000000000 --- a/apps/opencv_stitching_tool/opencv_stitching/megapix_downscaler.py +++ /dev/null @@ -1,12 +0,0 @@ -from .megapix_scaler import MegapixScaler - - -class MegapixDownscaler(MegapixScaler): - - @staticmethod - def force_downscale(scale): - return min(1.0, scale) - - def set_scale(self, scale): - scale = self.force_downscale(scale) - super().set_scale(scale) diff --git a/apps/opencv_stitching_tool/opencv_stitching/megapix_scaler.py b/apps/opencv_stitching_tool/opencv_stitching/megapix_scaler.py index 96d47536f9..a7be8ad3dc 100644 --- a/apps/opencv_stitching_tool/opencv_stitching/megapix_scaler.py +++ b/apps/opencv_stitching_tool/opencv_stitching/megapix_scaler.py @@ -25,3 +25,14 @@ class MegapixScaler: width = int(round(img_size[0] * self.scale)) height = int(round(img_size[1] * self.scale)) return (width, height) + + +class MegapixDownscaler(MegapixScaler): + + @staticmethod + def force_downscale(scale): + return min(1.0, scale) + + def set_scale(self, scale): + scale = self.force_downscale(scale) + super().set_scale(scale) diff --git a/apps/opencv_stitching_tool/opencv_stitching/panorama_estimation.py b/apps/opencv_stitching_tool/opencv_stitching/panorama_estimation.py deleted file mode 100644 index e3a45773ea..0000000000 --- a/apps/opencv_stitching_tool/opencv_stitching/panorama_estimation.py +++ /dev/null @@ -1,27 +0,0 @@ -import statistics - - -def estimate_final_panorama_dimensions(cameras, warper, img_handler): - medium_to_final_ratio = img_handler.get_medium_to_final_ratio() - - panorama_scale_determined_on_medium_img = \ - estimate_panorama_scale(cameras) - - panorama_scale = (panorama_scale_determined_on_medium_img * - medium_to_final_ratio) - panorama_corners = [] - panorama_sizes = [] - - for size, camera in zip(img_handler.img_sizes, cameras): - width, height = img_handler.final_scaler.get_scaled_img_size(size) - roi = warper.warp_roi(width, height, camera, panorama_scale, medium_to_final_ratio) - panorama_corners.append(roi[0:2]) - panorama_sizes.append(roi[2:4]) - - return panorama_scale, panorama_corners, panorama_sizes - - -def estimate_panorama_scale(cameras): - focals = [cam.focal for cam in cameras] - panorama_scale = statistics.median(focals) - return panorama_scale diff --git a/apps/opencv_stitching_tool/opencv_stitching/seam_finder.py b/apps/opencv_stitching_tool/opencv_stitching/seam_finder.py index 2ef86092ab..959880ef02 100644 --- a/apps/opencv_stitching_tool/opencv_stitching/seam_finder.py +++ b/apps/opencv_stitching_tool/opencv_stitching/seam_finder.py @@ -63,7 +63,14 @@ class SeamFinder: return cv.dilate(seam_lines, kernel) @staticmethod - def blend_seam_masks(seam_masks, corners, sizes, colors=[ + def blend_seam_masks(seam_masks, corners, sizes): + imgs = colored_img_generator(sizes) + blended_seam_masks, _ = \ + Blender.create_panorama(imgs, seam_masks, corners, sizes) + return blended_seam_masks + + +def colored_img_generator(sizes, colors=( (255, 000, 000), # Blue (000, 000, 255), # Red (000, 255, 000), # Green @@ -72,21 +79,13 @@ class SeamFinder: (128, 128, 255), # Pink (128, 128, 128), # Gray (000, 000, 128), # Brown - (000, 128, 255)] # Orange + (000, 128, 255)) # Orange ): - - blender = Blender("no") - blender.prepare(corners, sizes) - - for idx, (seam_mask, size, corner) in enumerate( - zip(seam_masks, sizes, corners)): - if idx+1 > len(colors): - raise ValueError("Not enough default colors! Pass additional " - "colors to \"colors\" parameter") - one_color_img = create_img_by_size(size, colors[idx]) - blender.feed(one_color_img, seam_mask, corner) - - return blender.blend() + for idx, size in enumerate(sizes): + if idx+1 > len(colors): + raise ValueError("Not enough default colors! Pass additional " + "colors to \"colors\" parameter") + yield create_img_by_size(size, colors[idx]) def create_img_by_size(size, color=(0, 0, 0)): diff --git a/apps/opencv_stitching_tool/opencv_stitching/stitcher.py b/apps/opencv_stitching_tool/opencv_stitching/stitcher.py index c08112664f..2419092420 100644 --- a/apps/opencv_stitching_tool/opencv_stitching/stitcher.py +++ b/apps/opencv_stitching_tool/opencv_stitching/stitcher.py @@ -8,7 +8,7 @@ from .camera_estimator import CameraEstimator from .camera_adjuster import CameraAdjuster from .camera_wave_corrector import WaveCorrector from .warper import Warper -from .panorama_estimation import estimate_final_panorama_dimensions +from .cropper import Cropper from .exposure_error_compensator import ExposureErrorCompensator from .seam_finder import SeamFinder from .blender import Blender @@ -33,6 +33,7 @@ class Stitcher: "wave_correct_kind": WaveCorrector.DEFAULT_WAVE_CORRECTION, "warper_type": Warper.DEFAULT_WARP_TYPE, "low_megapix": ImageHandler.DEFAULT_LOW_MEGAPIX, + "crop": Cropper.DEFAULT_CROP, "compensator": ExposureErrorCompensator.DEFAULT_COMPENSATOR, "nr_feeds": ExposureErrorCompensator.DEFAULT_NR_FEEDS, "block_size": ExposureErrorCompensator.DEFAULT_BLOCK_SIZE, @@ -68,6 +69,7 @@ class Stitcher: CameraAdjuster(args.adjuster, args.refinement_mask) self.wave_corrector = WaveCorrector(args.wave_correct_kind) self.warper = Warper(args.warper_type) + self.cropper = Cropper(args.crop) self.compensator = \ ExposureErrorCompensator(args.compensator, args.nr_feeds, args.block_size) @@ -77,7 +79,6 @@ class Stitcher: def stitch(self, img_names): self.initialize_registration(img_names) - imgs = self.resize_medium_resolution() features = self.find_features(imgs) matches = self.match_features(features) @@ -85,22 +86,26 @@ class Stitcher: cameras = self.estimate_camera_parameters(features, matches) cameras = self.refine_camera_parameters(features, matches, cameras) cameras = self.perform_wave_correction(cameras) - panorama_scale, panorama_corners, panorama_sizes = \ - self.estimate_final_panorama_dimensions(cameras) - - self.initialize_composition(panorama_corners, panorama_sizes) + self.estimate_scale(cameras) imgs = self.resize_low_resolution(imgs) - imgs = self.warp_low_resolution_images(imgs, cameras, panorama_scale) - self.estimate_exposure_errors(imgs) - seam_masks = self.find_seam_masks(imgs) + imgs, masks, corners, sizes = self.warp_low_resolution(imgs, cameras) + self.prepare_cropper(imgs, masks, corners, sizes) + imgs, masks, corners, sizes = \ + self.crop_low_resolution(imgs, masks, corners, sizes) + self.estimate_exposure_errors(corners, imgs, masks) + seam_masks = self.find_seam_masks(imgs, corners, masks) imgs = self.resize_final_resolution() - imgs = self.warp_final_resolution_images(imgs, cameras, panorama_scale) - imgs = self.compensate_exposure_errors(imgs) + imgs, masks, corners, sizes = self.warp_final_resolution(imgs, cameras) + imgs, masks, corners, sizes = \ + self.crop_final_resolution(imgs, masks, corners, sizes) + self.set_masks(masks) + imgs = self.compensate_exposure_errors(corners, imgs) seam_masks = self.resize_seam_masks(seam_masks) - self.blend_images(imgs, seam_masks) + self.initialize_composition(corners, sizes) + self.blend_images(imgs, seam_masks, corners) return self.create_final_panorama() def initialize_registration(self, img_names): @@ -132,9 +137,77 @@ class Stitcher: def perform_wave_correction(self, cameras): return self.wave_corrector.correct(cameras) - def estimate_final_panorama_dimensions(self, cameras): - return estimate_final_panorama_dimensions(cameras, self.warper, - self.img_handler) + def estimate_scale(self, cameras): + self.warper.set_scale(cameras) + + def resize_low_resolution(self, imgs=None): + return list(self.img_handler.resize_to_low_resolution(imgs)) + + def warp_low_resolution(self, imgs, cameras): + sizes = self.img_handler.get_low_img_sizes() + camera_aspect = self.img_handler.get_medium_to_low_ratio() + imgs, masks, corners, sizes = \ + self.warp(imgs, cameras, sizes, camera_aspect) + return list(imgs), list(masks), corners, sizes + + def warp_final_resolution(self, imgs, cameras): + sizes = self.img_handler.get_final_img_sizes() + camera_aspect = self.img_handler.get_medium_to_final_ratio() + return self.warp(imgs, cameras, sizes, camera_aspect) + + def warp(self, imgs, cameras, sizes, aspect=1): + imgs = self.warper.warp_images(imgs, cameras, aspect) + masks = self.warper.create_and_warp_masks(sizes, cameras, aspect) + corners, sizes = self.warper.warp_rois(sizes, cameras, aspect) + return imgs, masks, corners, sizes + + def prepare_cropper(self, imgs, masks, corners, sizes): + self.cropper.prepare(imgs, masks, corners, sizes) + + def crop_low_resolution(self, imgs, masks, corners, sizes): + imgs, masks, corners, sizes = self.crop(imgs, masks, corners, sizes) + return list(imgs), list(masks), corners, sizes + + def crop_final_resolution(self, imgs, masks, corners, sizes): + lir_aspect = self.img_handler.get_low_to_final_ratio() + return self.crop(imgs, masks, corners, sizes, lir_aspect) + + def crop(self, imgs, masks, corners, sizes, aspect=1): + masks = self.cropper.crop_images(masks, aspect) + imgs = self.cropper.crop_images(imgs, aspect) + corners, sizes = self.cropper.crop_rois(corners, sizes, aspect) + return imgs, masks, corners, sizes + + def estimate_exposure_errors(self, corners, imgs, masks): + self.compensator.feed(corners, imgs, masks) + + def find_seam_masks(self, imgs, corners, masks): + return self.seam_finder.find(imgs, corners, masks) + + def resize_final_resolution(self): + return self.img_handler.resize_to_final_resolution() + + def compensate_exposure_errors(self, corners, imgs): + for idx, (corner, img) in enumerate(zip(corners, imgs)): + yield self.compensator.apply(idx, corner, img, self.get_mask(idx)) + + def resize_seam_masks(self, seam_masks): + for idx, seam_mask in enumerate(seam_masks): + yield SeamFinder.resize(seam_mask, self.get_mask(idx)) + + def set_masks(self, mask_generator): + self.masks = mask_generator + self.mask_index = -1 + + def get_mask(self, idx): + if idx == self.mask_index + 1: + self.mask_index += 1 + self.mask = next(self.masks) + return self.mask + elif idx == self.mask_index: + return self.mask + else: + raise StitchingError("Invalid Mask Index!") def initialize_composition(self, corners, sizes): if self.timelapser.do_timelapse: @@ -142,66 +215,22 @@ class Stitcher: else: self.blender.prepare(corners, sizes) - def resize_low_resolution(self, imgs=None): - return list(self.img_handler.resize_to_low_resolution(imgs)) - - def warp_low_resolution_images(self, imgs, cameras, final_scale): - camera_aspect = self.img_handler.get_medium_to_low_ratio() - scale = final_scale * self.img_handler.get_final_to_low_ratio() - return list(self.warp_images(imgs, cameras, scale, camera_aspect)) - - def warp_final_resolution_images(self, imgs, cameras, scale): - camera_aspect = self.img_handler.get_medium_to_final_ratio() - return self.warp_images(imgs, cameras, scale, camera_aspect) - - def warp_images(self, imgs, cameras, scale, aspect=1): - self._masks = [] - self._corners = [] - for img_warped, mask_warped, corner in \ - self.warper.warp_images_and_image_masks( - imgs, cameras, scale, aspect - ): - self._masks.append(mask_warped) - self._corners.append(corner) - yield img_warped - - def estimate_exposure_errors(self, imgs): - self.compensator.feed(self._corners, imgs, self._masks) - - def find_seam_masks(self, imgs): - return self.seam_finder.find(imgs, self._corners, self._masks) - - def resize_final_resolution(self): - return self.img_handler.resize_to_final_resolution() - - def compensate_exposure_errors(self, imgs): - for idx, img in enumerate(imgs): - yield self.compensator.apply(idx, self._corners[idx], - img, self._masks[idx]) - - def resize_seam_masks(self, seam_masks): - for idx, seam_mask in enumerate(seam_masks): - yield SeamFinder.resize(seam_mask, self._masks[idx]) - - def blend_images(self, imgs, masks): - for idx, (img, mask) in enumerate(zip(imgs, masks)): + def blend_images(self, imgs, masks, corners): + for idx, (img, mask, corner) in enumerate(zip(imgs, masks, corners)): if self.timelapser.do_timelapse: self.timelapser.process_and_save_frame( - self.img_handler.img_names[idx], img, self._corners[idx] + self.img_handler.img_names[idx], img, corner ) else: - self.blender.feed(img, mask, self._corners[idx]) + self.blender.feed(img, mask, corner) def create_final_panorama(self): if not self.timelapser.do_timelapse: - return self.blender.blend() + panorama, _ = self.blender.blend() + return panorama @staticmethod def validate_kwargs(kwargs): for arg in kwargs: if arg not in Stitcher.DEFAULT_SETTINGS: raise StitchingError("Invalid Argument: " + arg) - - def collect_garbage(self): - del self.img_handler.img_names, self.img_handler.img_sizes, - del self._corners, self._masks diff --git a/apps/opencv_stitching_tool/opencv_stitching/subsetter.py b/apps/opencv_stitching_tool/opencv_stitching/subsetter.py index 4ea6acc60d..e037984530 100644 --- a/apps/opencv_stitching_tool/opencv_stitching/subsetter.py +++ b/apps/opencv_stitching_tool/opencv_stitching/subsetter.py @@ -44,13 +44,12 @@ class Subsetter: indices = cv.detail.leaveBiggestComponent(features, pairwise_matches, self.confidence_threshold) - indices_as_list = [int(idx) for idx in list(indices[:, 0])] - if len(indices_as_list) < 2: + if len(indices) < 2: raise StitchingError("No match exceeds the " "given confidence theshold.") - return indices_as_list + return indices @staticmethod def subset_list(list_to_subset, indices): diff --git a/apps/opencv_stitching_tool/opencv_stitching/test/test_megapix_scaler.py b/apps/opencv_stitching_tool/opencv_stitching/test/test_megapix_scaler.py index 0afdad2628..0dc5b8fbbf 100644 --- a/apps/opencv_stitching_tool/opencv_stitching/test/test_megapix_scaler.py +++ b/apps/opencv_stitching_tool/opencv_stitching/test/test_megapix_scaler.py @@ -7,9 +7,8 @@ import cv2 as cv sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))) -from opencv_stitching.megapix_scaler import MegapixScaler -from opencv_stitching.megapix_downscaler import MegapixDownscaler -#%% +from opencv_stitching.megapix_scaler import MegapixScaler, MegapixDownscaler +# %% class TestScaler(unittest.TestCase): diff --git a/apps/opencv_stitching_tool/opencv_stitching/test/test_performance.py b/apps/opencv_stitching_tool/opencv_stitching/test/test_performance.py index 60b03a8bfe..2028ed8b5c 100644 --- a/apps/opencv_stitching_tool/opencv_stitching/test/test_performance.py +++ b/apps/opencv_stitching_tool/opencv_stitching/test/test_performance.py @@ -14,6 +14,7 @@ from stitching_detailed import main class TestStitcher(unittest.TestCase): + @unittest.skip("skip performance test (not needed in every run)") def test_performance(self): print("Run new Stitcher class:") @@ -25,7 +26,6 @@ class TestStitcher(unittest.TestCase): stitcher.stitch(["boat5.jpg", "boat2.jpg", "boat3.jpg", "boat4.jpg", "boat1.jpg", "boat6.jpg"]) - stitcher.collect_garbage() _, peak_memory = tracemalloc.get_traced_memory() tracemalloc.stop() diff --git a/apps/opencv_stitching_tool/opencv_stitching/test/test_registration.py b/apps/opencv_stitching_tool/opencv_stitching/test/test_registration.py index 98e792fd01..15b851e433 100644 --- a/apps/opencv_stitching_tool/opencv_stitching/test/test_registration.py +++ b/apps/opencv_stitching_tool/opencv_stitching/test/test_registration.py @@ -70,8 +70,8 @@ class TestImageRegistration(unittest.TestCase): indices_to_delete = subsetter.get_indices_to_delete(len(img_names), indices) - self.assertEqual(indices, [2, 3, 4]) - self.assertEqual(indices_to_delete, [0, 1]) + np.testing.assert_array_equal(indices, np.array([2, 3, 4])) + np.testing.assert_array_equal(indices_to_delete, np.array([0, 1])) subsetted_image_names = subsetter.subset_list(img_names, indices) self.assertEqual(subsetted_image_names, diff --git a/apps/opencv_stitching_tool/opencv_stitching/test/test_stitcher.py b/apps/opencv_stitching_tool/opencv_stitching/test/test_stitcher.py index 5a24f752c0..d97300dadd 100644 --- a/apps/opencv_stitching_tool/opencv_stitching/test/test_stitcher.py +++ b/apps/opencv_stitching_tool/opencv_stitching/test/test_stitcher.py @@ -15,7 +15,7 @@ from opencv_stitching.stitcher import Stitcher class TestStitcher(unittest.TestCase): def test_stitcher_aquaduct(self): - stitcher = Stitcher(n_features=250) + stitcher = Stitcher(nfeatures=250) result = stitcher.stitch(["s1.jpg", "s2.jpg"]) cv.imwrite("result.jpg", result) @@ -30,7 +30,7 @@ class TestStitcher(unittest.TestCase): "wave_correct_kind": "no", "finder": "dp_colorgrad", "compensator": "no", - "conf_thresh": 0.3} + "confidence_threshold": 0.3} stitcher = Stitcher(**settings) result = stitcher.stitch(["boat5.jpg", "boat2.jpg", @@ -49,7 +49,7 @@ class TestStitcher(unittest.TestCase): settings = {"warper_type": "compressedPlaneA2B1", "finder": "dp_colorgrad", "compensator": "channel_blocks", - "conf_thresh": 0.3} + "confidence_threshold": 0.3} stitcher = Stitcher(**settings) result = stitcher.stitch(["boat5.jpg", "boat2.jpg", @@ -64,7 +64,7 @@ class TestStitcher(unittest.TestCase): atol=max_image_shape_derivation) def test_stitcher_boat_aquaduct_subset(self): - settings = {"final_megapix": 1} + settings = {"final_megapix": 1, "crop": True} stitcher = Stitcher(**settings) result = stitcher.stitch(["boat5.jpg", @@ -76,7 +76,7 @@ class TestStitcher(unittest.TestCase): max_image_shape_derivation = 100 np.testing.assert_allclose(result.shape[:2], - (839, 3384), + (705, 3374), atol=max_image_shape_derivation) def test_stitcher_budapest(self): diff --git a/apps/opencv_stitching_tool/opencv_stitching/warper.py b/apps/opencv_stitching_tool/opencv_stitching/warper.py index 47f2ec0adb..44ecb00f5f 100644 --- a/apps/opencv_stitching_tool/opencv_stitching/warper.py +++ b/apps/opencv_stitching_tool/opencv_stitching/warper.py @@ -1,3 +1,5 @@ +from statistics import median + import cv2 as cv import numpy as np @@ -15,48 +17,54 @@ class Warper: DEFAULT_WARP_TYPE = 'spherical' - def __init__(self, warper_type=DEFAULT_WARP_TYPE, scale=1): + def __init__(self, warper_type=DEFAULT_WARP_TYPE): self.warper_type = warper_type - self.warper = cv.PyRotationWarper(warper_type, scale) - self.scale = scale + self.scale = None - def warp_images_and_image_masks(self, imgs, cameras, scale=None, aspect=1): - self.update_scale(scale) + def set_scale(self, cameras): + focals = [cam.focal for cam in cameras] + self.scale = median(focals) + + def warp_images(self, imgs, cameras, aspect=1): for img, camera in zip(imgs, cameras): - yield self.warp_image_and_image_mask(img, camera, scale, aspect) + yield self.warp_image(img, camera, aspect) - def warp_image_and_image_mask(self, img, camera, scale=None, aspect=1): - self.update_scale(scale) - corner, img_warped = self.warp_image(img, camera, aspect) - mask = 255 * np.ones((img.shape[0], img.shape[1]), np.uint8) - _, mask_warped = self.warp_image(mask, camera, aspect, mask=True) - return img_warped, mask_warped, corner + def warp_image(self, img, camera, aspect=1): + warper = cv.PyRotationWarper(self.warper_type, self.scale*aspect) + _, warped_image = warper.warp(img, + Warper.get_K(camera, aspect), + camera.R, + cv.INTER_LINEAR, + cv.BORDER_REFLECT) + return warped_image - def warp_image(self, image, camera, aspect=1, mask=False): - if mask: - interp_mode = cv.INTER_NEAREST - border_mode = cv.BORDER_CONSTANT - else: - interp_mode = cv.INTER_LINEAR - border_mode = cv.BORDER_REFLECT + def create_and_warp_masks(self, sizes, cameras, aspect=1): + for size, camera in zip(sizes, cameras): + yield self.create_and_warp_mask(size, camera, aspect) - corner, warped_image = self.warper.warp(image, - Warper.get_K(camera, aspect), - camera.R, - interp_mode, - border_mode) - return corner, warped_image + def create_and_warp_mask(self, size, camera, aspect=1): + warper = cv.PyRotationWarper(self.warper_type, self.scale*aspect) + mask = 255 * np.ones((size[1], size[0]), np.uint8) + _, warped_mask = warper.warp(mask, + Warper.get_K(camera, aspect), + camera.R, + cv.INTER_NEAREST, + cv.BORDER_CONSTANT) + return warped_mask - def warp_roi(self, width, height, camera, scale=None, aspect=1): - self.update_scale(scale) - roi = (width, height) + def warp_rois(self, sizes, cameras, aspect=1): + roi_corners = [] + roi_sizes = [] + for size, camera in zip(sizes, cameras): + roi = self.warp_roi(size, camera, aspect) + roi_corners.append(roi[0:2]) + roi_sizes.append(roi[2:4]) + return roi_corners, roi_sizes + + def warp_roi(self, size, camera, aspect=1): + warper = cv.PyRotationWarper(self.warper_type, self.scale*aspect) K = Warper.get_K(camera, aspect) - return self.warper.warpRoi(roi, K, camera.R) - - def update_scale(self, scale): - if scale is not None and scale != self.scale: - self.warper = cv.PyRotationWarper(self.warper_type, scale) # setScale not working: https://docs.opencv.org/5.x/d5/d76/classcv_1_1PyRotationWarper.html#a90b000bb75f95294f9b0b6ec9859eb55 - self.scale = scale + return self.warper.warpRoi(size, K, camera.R) @staticmethod def get_K(camera, aspect=1): diff --git a/apps/opencv_stitching_tool/opencv_stitching_tool.py b/apps/opencv_stitching_tool/opencv_stitching_tool.py index 1ee96aa8cb..2e41c11b87 100644 --- a/apps/opencv_stitching_tool/opencv_stitching_tool.py +++ b/apps/opencv_stitching_tool/opencv_stitching_tool.py @@ -23,6 +23,7 @@ from opencv_stitching.camera_estimator import CameraEstimator from opencv_stitching.camera_adjuster import CameraAdjuster from opencv_stitching.camera_wave_corrector import WaveCorrector from opencv_stitching.warper import Warper +from opencv_stitching.cropper import Cropper from opencv_stitching.exposure_error_compensator import ExposureErrorCompensator # noqa from opencv_stitching.seam_finder import SeamFinder from opencv_stitching.blender import Blender @@ -72,9 +73,7 @@ parser.add_argument( type=int, dest='range_width' ) parser.add_argument( - '--try_use_gpu', - action='store', - default=False, + '--try_use_gpu', action='store', default=False, help="Try to use CUDA. The default value is no. " "All default values are for CPU mode.", type=bool, dest='try_use_gpu' @@ -146,6 +145,13 @@ parser.add_argument( "The default is %s Mpx." % ImageHandler.DEFAULT_LOW_MEGAPIX, type=float, dest='low_megapix' ) +parser.add_argument( + '--crop', action='store', default=Cropper.DEFAULT_CROP, + help="Crop black borders around images caused by warping using the " + "largest interior rectangle. " + "Default is '%s'." % Cropper.DEFAULT_CROP, + type=bool, dest='crop' +) parser.add_argument( '--compensator', action='store', default=ExposureErrorCompensator.DEFAULT_COMPENSATOR, diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index 037c7fb5ba..bcb8a3e203 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -119,12 +119,12 @@ if(CV_GCC OR CV_CLANG) # we want. add_extra_compiler_option(-Wall) endif() - add_extra_compiler_option(-Werror=return-type) - add_extra_compiler_option(-Werror=non-virtual-dtor) - add_extra_compiler_option(-Werror=address) - add_extra_compiler_option(-Werror=sequence-point) + add_extra_compiler_option(-Wreturn-type) + add_extra_compiler_option(-Wnon-virtual-dtor) + add_extra_compiler_option(-Waddress) + add_extra_compiler_option(-Wsequence-point) add_extra_compiler_option(-Wformat) - add_extra_compiler_option(-Werror=format-security -Wformat) + add_extra_compiler_option(-Wformat-security -Wformat) add_extra_compiler_option(-Wmissing-declarations) add_extra_compiler_option(-Wmissing-prototypes) add_extra_compiler_option(-Wstrict-prototypes) @@ -367,6 +367,22 @@ if(NOT OPENCV_SKIP_LINK_AS_NEEDED) endif() endif() +# Apply "-Wl,--no-undefined" linker flags: https://github.com/opencv/opencv/pull/21347 +if(NOT OPENCV_SKIP_LINK_NO_UNDEFINED) + if(UNIX AND (NOT APPLE OR NOT CMAKE_VERSION VERSION_LESS "3.2")) + set(_option "-Wl,--no-undefined") + set(_saved_CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${_option}") # requires CMake 3.2+ and CMP0056 + ocv_check_compiler_flag(CXX "" HAVE_LINK_NO_UNDEFINED) + set(CMAKE_EXE_LINKER_FLAGS "${_saved_CMAKE_EXE_LINKER_FLAGS}") + if(HAVE_LINK_NO_UNDEFINED) + set(OPENCV_EXTRA_EXE_LINKER_FLAGS "${OPENCV_EXTRA_EXE_LINKER_FLAGS} ${_option}") + set(OPENCV_EXTRA_SHARED_LINKER_FLAGS "${OPENCV_EXTRA_SHARED_LINKER_FLAGS} ${_option}") + set(OPENCV_EXTRA_MODULE_LINKER_FLAGS "${OPENCV_EXTRA_MODULE_LINKER_FLAGS} ${_option}") + endif() + endif() +endif() + # combine all "extra" options if(NOT OPENCV_SKIP_EXTRA_COMPILER_FLAGS) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}") diff --git a/cmake/OpenCVDetectInferenceEngine.cmake b/cmake/OpenCVDetectInferenceEngine.cmake index 6521fbab93..319fd5bf0a 100644 --- a/cmake/OpenCVDetectInferenceEngine.cmake +++ b/cmake/OpenCVDetectInferenceEngine.cmake @@ -1,101 +1,38 @@ -# The script detects Intel(R) Inference Engine installation -# -# Cache variables: -# INF_ENGINE_RELEASE - a number reflecting IE source interface (linked with OpenVINO release) -# -# Detect parameters: -# 1. Native cmake IE package: -# - environment variable InferenceEngine_DIR is set to location of cmake module -# 2. Custom location: -# - INF_ENGINE_INCLUDE_DIRS - headers search location -# - INF_ENGINE_LIB_DIRS - library search location -# 3. OpenVINO location: -# - environment variable INTEL_OPENVINO_DIR is set to location of OpenVINO installation dir -# - INF_ENGINE_PLATFORM - part of name of library directory representing its platform +# The script detects Intel(R) OpenVINO(TM) runtime installation # # Result: -# INF_ENGINE_TARGET - set to name of imported library target representing InferenceEngine -# +# - target ocv.3rdparty.openvino - -macro(ocv_ie_find_extra_libraries find_prefix find_suffix) - file(GLOB libraries "${INF_ENGINE_LIB_DIRS}/${find_prefix}inference_engine*${find_suffix}") - foreach(full_path IN LISTS libraries) - get_filename_component(library "${full_path}" NAME_WE) - string(REPLACE "${find_prefix}" "" library "${library}") - if(library STREQUAL "inference_engine" OR library STREQUAL "inference_engined") - # skip - else() - add_library(${library} UNKNOWN IMPORTED) - set_target_properties(${library} PROPERTIES - IMPORTED_LOCATION "${full_path}") - list(APPEND custom_libraries ${library}) - endif() - endforeach() -endmacro() - -function(add_custom_ie_build _inc _lib _lib_rel _lib_dbg _msg) - if(NOT _inc OR NOT (_lib OR _lib_rel OR _lib_dbg)) +if(WITH_OPENVINO) + find_package(OpenVINO QUIET) + if(OpenVINO_FOUND) + message(STATUS "OpenVINO FOUND: ${OpenVINO_VERSION}") + math(EXPR ver "${OpenVINO_VERSION_MAJOR} * 1000000 + ${OpenVINO_VERSION_MINOR} * 10000 + ${OpenVINO_VERSION_PATCH} * 100") + ocv_add_external_target(openvino "" "openvino::runtime" "INF_ENGINE_RELEASE=${ver};HAVE_NGRAPH;HAVE_DNN_NGRAPH;HAVE_INF_ENGINE") + set(HAVE_OPENVINO 1) return() endif() - if(NOT _lib) - if(_lib_rel) - set(_lib "${_lib_rel}") - else() - set(_lib "${_lib_dbg}") - endif() - endif() - add_library(inference_engine UNKNOWN IMPORTED) - set_target_properties(inference_engine PROPERTIES - IMPORTED_LOCATION "${_lib}" - IMPORTED_IMPLIB_RELEASE "${_lib_rel}" - IMPORTED_IMPLIB_DEBUG "${_lib_dbg}" - INTERFACE_INCLUDE_DIRECTORIES "${_inc}" - ) +endif() - set(custom_libraries "") - set(__prefixes "${CMAKE_FIND_LIBRARY_PREFIXES}") - if(NOT __prefixes) - set(__prefixes "_empty_") - endif() - foreach(find_prefix ${__prefixes}) - if(find_prefix STREQUAL "_empty_") # foreach doesn't iterate over empty elements - set(find_prefix "") - endif() - if(NOT DEFINED INFERENCE_ENGINE_FIND_LIBRARY_SUFFIXES) # allow custom override - set(INFERENCE_ENGINE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) - if(APPLE) - ocv_list_filterout(INFERENCE_ENGINE_FIND_LIBRARY_SUFFIXES "^.so$") # skip plugins (can't be linked) - endif() - endif() - foreach(find_suffix ${INFERENCE_ENGINE_FIND_LIBRARY_SUFFIXES}) - ocv_ie_find_extra_libraries("${find_prefix}" "${find_suffix}") - endforeach() - if(NOT CMAKE_FIND_LIBRARY_SUFFIXES) - ocv_ie_find_extra_libraries("${find_prefix}" "") - endif() - endforeach() +# ====================== - if(NOT INF_ENGINE_RELEASE VERSION_GREATER "2018050000") - find_library(INF_ENGINE_OMP_LIBRARY iomp5 PATHS "${INF_ENGINE_OMP_DIR}" NO_DEFAULT_PATH) - if(NOT INF_ENGINE_OMP_LIBRARY) - message(WARNING "OpenMP for IE have not been found. Set INF_ENGINE_OMP_DIR variable if you experience build errors.") - endif() +if(WITH_OPENVINO) + find_package(OpenVINO QUIET) + if(OpenVINO_FOUND) + message(STATUS "OpenVINO FOUND: ${OpenVINO_VERSION}") + math(EXPR ver "${OpenVINO_VERSION_MAJOR} * 1000000 + ${OpenVINO_VERSION_MINOR} * 10000 + ${OpenVINO_VERSION_PATCH} * 100") + ocv_add_external_target(openvino "" "openvino::runtime" "INF_ENGINE_RELEASE=${ver};HAVE_NGRAPH;HAVE_DNN_NGRAPH;HAVE_INF_ENGINE") + set(HAVE_OPENVINO 1) + return() endif() - if(EXISTS "${INF_ENGINE_OMP_LIBRARY}") - set_target_properties(inference_engine PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${INF_ENGINE_OMP_LIBRARY}") - endif() - set(INF_ENGINE_VERSION "Unknown" CACHE STRING "") - set(INF_ENGINE_TARGET "inference_engine;${custom_libraries}" PARENT_SCOPE) - message(STATUS "Detected InferenceEngine: ${_msg}") -endfunction() +endif() # ====================== find_package(InferenceEngine QUIET) if(InferenceEngine_FOUND) set(INF_ENGINE_TARGET ${InferenceEngine_LIBRARIES}) - set(INF_ENGINE_VERSION "${InferenceEngine_VERSION}" CACHE STRING "") + set(INF_ENGINE_VERSION "${InferenceEngine_VERSION}") message(STATUS "Detected InferenceEngine: cmake package (${InferenceEngine_VERSION})") endif() @@ -113,47 +50,19 @@ elseif(DEFINED INF_ENGINE_RELEASE) endif() set(INF_ENGINE_RELEASE "${INF_ENGINE_RELEASE_INIT}" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)") -if(NOT INF_ENGINE_TARGET AND INF_ENGINE_LIB_DIRS AND INF_ENGINE_INCLUDE_DIRS) - find_path(ie_custom_inc "inference_engine.hpp" PATHS "${INF_ENGINE_INCLUDE_DIRS}" NO_DEFAULT_PATH) - if(CMAKE_BUILD_TYPE STREQUAL "Debug") - find_library(ie_custom_lib_dbg "inference_engined" PATHS "${INF_ENGINE_LIB_DIRS}" NO_DEFAULT_PATH) # Win32 and MacOSX - endif() - find_library(ie_custom_lib "inference_engine" PATHS "${INF_ENGINE_LIB_DIRS}" NO_DEFAULT_PATH) - find_library(ie_custom_lib_rel "inference_engine" PATHS "${INF_ENGINE_LIB_DIRS}/Release" NO_DEFAULT_PATH) - find_library(ie_custom_lib_dbg "inference_engine" PATHS "${INF_ENGINE_LIB_DIRS}/Debug" NO_DEFAULT_PATH) - add_custom_ie_build("${ie_custom_inc}" "${ie_custom_lib}" "${ie_custom_lib_rel}" "${ie_custom_lib_dbg}" "INF_ENGINE_{INCLUDE,LIB}_DIRS") -endif() - -set(_loc "$ENV{INTEL_OPENVINO_DIR}") -if(NOT _loc AND DEFINED ENV{INTEL_CVSDK_DIR}) - set(_loc "$ENV{INTEL_CVSDK_DIR}") # OpenVINO 2018.x -endif() -if(NOT INF_ENGINE_TARGET AND _loc) - if(NOT INF_ENGINE_RELEASE VERSION_GREATER "2018050000") - set(INF_ENGINE_PLATFORM_DEFAULT "ubuntu_16.04") - else() - set(INF_ENGINE_PLATFORM_DEFAULT "") - endif() - set(INF_ENGINE_PLATFORM "${INF_ENGINE_PLATFORM_DEFAULT}" CACHE STRING "InferenceEngine platform (library dir)") - find_path(ie_custom_env_inc "inference_engine.hpp" PATHS "${_loc}/deployment_tools/inference_engine/include" NO_DEFAULT_PATH) - if(CMAKE_BUILD_TYPE STREQUAL "Debug") - find_library(ie_custom_env_lib_dbg "inference_engined" PATHS "${_loc}/deployment_tools/inference_engine/lib/${INF_ENGINE_PLATFORM}/intel64" NO_DEFAULT_PATH) - endif() - find_library(ie_custom_env_lib "inference_engine" PATHS "${_loc}/deployment_tools/inference_engine/lib/${INF_ENGINE_PLATFORM}/intel64" NO_DEFAULT_PATH) - find_library(ie_custom_env_lib_rel "inference_engine" PATHS "${_loc}/deployment_tools/inference_engine/lib/intel64/Release" NO_DEFAULT_PATH) - find_library(ie_custom_env_lib_dbg "inference_engine" PATHS "${_loc}/deployment_tools/inference_engine/lib/intel64/Debug" NO_DEFAULT_PATH) - add_custom_ie_build("${ie_custom_env_inc}" "${ie_custom_env_lib}" "${ie_custom_env_lib_rel}" "${ie_custom_env_lib_dbg}" "OpenVINO (${_loc})") -endif() +set(tgts) +set(defs) # Add more features to the target - if(INF_ENGINE_TARGET) set_target_properties(${INF_ENGINE_TARGET} PROPERTIES INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}" ) + list(APPEND tgts ${INF_ENGINE_TARGET}) + list(APPEND defs "INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}" "HAVE_INF_ENGINE") endif() -if(WITH_NGRAPH) +if(WITH_NGRAPH OR NOT DEFINED WITH_NGRAPH) find_package(ngraph QUIET) if(ngraph_FOUND) ocv_assert(TARGET ngraph::ngraph) @@ -162,5 +71,9 @@ if(WITH_NGRAPH) endif() message(STATUS "Detected ngraph: cmake package (${ngraph_VERSION})") set(HAVE_NGRAPH ON) + list(APPEND tgts ngraph::ngraph) + list(APPEND defs "HAVE_NGRAPH" "HAVE_DNN_NGRAPH") endif() endif() + +ocv_add_external_target(openvino "" "${tgts}" "${defs}") diff --git a/cmake/OpenCVFindLibsGrfmt.cmake b/cmake/OpenCVFindLibsGrfmt.cmake index 95d1d92f68..00886cc131 100644 --- a/cmake/OpenCVFindLibsGrfmt.cmake +++ b/cmake/OpenCVFindLibsGrfmt.cmake @@ -17,8 +17,10 @@ else() unset(_zlib_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES) endif() if(ZLIB_FOUND AND ANDROID) - if(ZLIB_LIBRARIES MATCHES "/usr/lib.*/libz.so$") + if(ZLIB_LIBRARY MATCHES "/usr/lib.*/libz.so$") + set(ZLIB_LIBRARY z) set(ZLIB_LIBRARIES z) + set(ZLIB_LIBRARY_RELEASE z) endif() endif() endif() diff --git a/cmake/OpenCVPluginStandalone.cmake b/cmake/OpenCVPluginStandalone.cmake index 15b7a8085e..129ede1ae1 100644 --- a/cmake/OpenCVPluginStandalone.cmake +++ b/cmake/OpenCVPluginStandalone.cmake @@ -78,10 +78,17 @@ function(ocv_create_plugin module default_name dependency_target dependency_targ set_target_properties(${OPENCV_PLUGIN_NAME} PROPERTIES PREFIX "${OPENCV_PLUGIN_MODULE_PREFIX}") endif() - if(APPLE) - set_target_properties(${OPENCV_PLUGIN_NAME} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") - elseif(WIN32) - # Hack for Windows only, Linux/MacOS uses global symbol table (without exact .so binding) + if(WIN32 OR NOT APPLE) + set(OPENCV_PLUGIN_NO_LINK FALSE CACHE BOOL "") + else() + set(OPENCV_PLUGIN_NO_LINK TRUE CACHE BOOL "") + endif() + + if(OPENCV_PLUGIN_NO_LINK) + if(APPLE) + set_target_properties(${OPENCV_PLUGIN_NAME} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") + endif() + else() find_package(OpenCV REQUIRED ${module} ${OPENCV_PLUGIN_DEPS}) target_link_libraries(${OPENCV_PLUGIN_NAME} PRIVATE ${OpenCV_LIBRARIES}) endif() diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index 21d60cc0f2..5d49b8a889 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -1619,6 +1619,7 @@ function(ocv_add_external_target name inc link def) endif() endfunction() + # Returns the first non-interface target function(ocv_get_imported_target imported interface) set(__result "${interface}") diff --git a/doc/tutorials/dnn/dnn_face/dnn_face.markdown b/doc/tutorials/dnn/dnn_face/dnn_face.markdown index 069de2e919..09180a6a50 100644 --- a/doc/tutorials/dnn/dnn_face/dnn_face.markdown +++ b/doc/tutorials/dnn/dnn_face/dnn_face.markdown @@ -8,19 +8,19 @@ | | | | -: | :- | | Original Author | Chengrui Wang, Yuantao Feng | -| Compatibility | OpenCV >= 4.5.1 | +| Compatibility | OpenCV >= 4.5.4 | ## Introduction -In this section, we introduce the DNN-based module for face detection and face recognition. Models can be obtained in [Models](#Models). The usage of `FaceDetectorYN` and `FaceRecognizerSF` are presented in [Usage](#Usage). +In this section, we introduce cv::FaceDetectorYN class for face detection and cv::FaceRecognizerSF class for face recognition. ## Models There are two models (ONNX format) pre-trained and required for this module: -- [Face Detection](https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx): - - Size: 337KB +- [Face Detection](https://github.com/opencv/opencv_zoo/tree/master/models/face_detection_yunet): + - Size: 338KB - Results on WIDER Face Val set: 0.830(easy), 0.824(medium), 0.708(hard) -- [Face Recognition](https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view?usp=sharing) +- [Face Recognition](https://github.com/opencv/opencv_zoo/tree/master/models/face_recognition_sface) - Size: 36.9MB - Results: @@ -32,9 +32,7 @@ There are two models (ONNX format) pre-trained and required for this module: | AgeDB-30 | 94.90% | 1.202 | 0.277 | | CFP-FP | 94.80% | 1.253 | 0.212 | -## Usage - -### DNNFaceDetector +## Code @add_toggle_cpp - **Downloadable code**: Click diff --git a/doc/tutorials/introduction/config_reference/config_reference.markdown b/doc/tutorials/introduction/config_reference/config_reference.markdown index 998bcfb392..d71a3a8c82 100644 --- a/doc/tutorials/introduction/config_reference/config_reference.markdown +++ b/doc/tutorials/introduction/config_reference/config_reference.markdown @@ -476,9 +476,10 @@ OpenCV have own DNN inference module which have own build-in engine, but can als | `BUILD_PROTOBUF` | _ON_ | Build own copy of _protobuf_. Must be disabled if you want to use external library. | | `PROTOBUF_UPDATE_FILES` | _OFF_ | Re-generate all .proto files. _protoc_ compiler compatible with used version of _protobuf_ must be installed. | | `OPENCV_DNN_OPENCL` | _ON_ | Enable built-in OpenCL inference backend. | -| `WITH_INF_ENGINE` | _OFF_ | Enables [Intel Inference Engine (IE)](https://github.com/openvinotoolkit/openvino) backend. Allows to execute networks in IE format (.xml + .bin). Inference Engine must be installed either as part of [OpenVINO toolkit](https://en.wikipedia.org/wiki/OpenVINO), either as a standalone library built from sources. | -| `INF_ENGINE_RELEASE` | _2020040000_ | Defines version of Inference Engine library which is tied to OpenVINO toolkit version. Must be a 10-digit string, e.g. _2020040000_ for OpenVINO 2020.4. | -| `WITH_NGRAPH` | _OFF_ | Enables Intel NGraph library support. This library is part of Inference Engine backend which allows executing arbitrary networks read from files in multiple formats supported by OpenCV: Caffe, TensorFlow, PyTorch, Darknet, etc.. NGraph library must be installed, it is included into Inference Engine. | +| `WITH_INF_ENGINE` | _OFF_ | **Deprecated since OpenVINO 2022.1** Enables [Intel Inference Engine (IE)](https://github.com/openvinotoolkit/openvino) backend. Allows to execute networks in IE format (.xml + .bin). Inference Engine must be installed either as part of [OpenVINO toolkit](https://en.wikipedia.org/wiki/OpenVINO), either as a standalone library built from sources. | +| `INF_ENGINE_RELEASE` | _2020040000_ | **Deprecated since OpenVINO 2022.1** Defines version of Inference Engine library which is tied to OpenVINO toolkit version. Must be a 10-digit string, e.g. _2020040000_ for OpenVINO 2020.4. | +| `WITH_NGRAPH` | _OFF_ | **Deprecated since OpenVINO 2022.1** Enables Intel NGraph library support. This library is part of Inference Engine backend which allows executing arbitrary networks read from files in multiple formats supported by OpenCV: Caffe, TensorFlow, PyTorch, Darknet, etc.. NGraph library must be installed, it is included into Inference Engine. | +| `WITH_OPENVINO` | _OFF_ | Enable Intel OpenVINO Toolkit support. Should be used for OpenVINO>=2022.1 instead of `WITH_INF_ENGINE` and `WITH_NGRAPH`. | | `OPENCV_DNN_CUDA` | _OFF_ | Enable CUDA backend. [CUDA](https://en.wikipedia.org/wiki/CUDA), CUBLAS and [CUDNN](https://developer.nvidia.com/cudnn) must be installed. | | `WITH_HALIDE` | _OFF_ | Use experimental [Halide](https://en.wikipedia.org/wiki/Halide_(programming_language)) backend which can generate optimized code for dnn-layers at runtime. Halide must be installed. | | `WITH_VULKAN` | _OFF_ | Enable experimental [Vulkan](https://en.wikipedia.org/wiki/Vulkan_(API)) backend. Does not require additional dependencies, but can use external Vulkan headers (`VULKAN_INCLUDE_DIRS`). | diff --git a/modules/3d/src/dls.cpp b/modules/3d/src/dls.cpp index cbcd8fea6a..ed2e37f558 100644 --- a/modules/3d/src/dls.cpp +++ b/modules/3d/src/dls.cpp @@ -25,8 +25,7 @@ namespace cv { dls::dls(const Mat& opoints, const Mat& ipoints) { - - N = std::max(opoints.checkVector(3, CV_32F), opoints.checkVector(3, CV_64F)); + N = std::max(opoints.checkVector(3, CV_32F), opoints.checkVector(3, CV_64F)); p = Mat(3, N, CV_64F); z = Mat(3, N, CV_64F); mn = Mat::zeros(3, 1, CV_64F); @@ -274,7 +273,7 @@ void dls::build_coeff_matrix(const Mat& pp, Mat& Mtilde, Mat& D) } void dls::compute_eigenvec(const Mat& Mtilde, Mat& eigenval_real, Mat& eigenval_imag, - Mat& eigenvec_real, Mat& eigenvec_imag) + Mat& eigenvec_real, Mat& eigenvec_imag) { #ifdef HAVE_EIGEN Eigen::MatrixXd Mtilde_eig, zeros_eig; @@ -606,8 +605,8 @@ Mat dls::skewsymm(const Mat * X1) { MatConstIterator_ it = X1->begin(); return (Mat_(3,3) << 0, -*(it+2), *(it+1), - *(it+2), 0, -*(it+0), - -*(it+1), *(it+0), 0); + *(it+2), 0, -*(it+0), + -*(it+1), *(it+0), 0); } Mat dls::rotx(const double t) @@ -658,4 +657,4 @@ bool dls::positive_eigenvalues(const Mat * eigenvalues) return *(it) > 0 && *(it+1) > 0 && *(it+2) > 0; } -} +} // namespace cv diff --git a/modules/3d/src/dls.h b/modules/3d/src/dls.h index 8c385709b2..a4b0ffa45c 100644 --- a/modules/3d/src/dls.h +++ b/modules/3d/src/dls.h @@ -9,16 +9,16 @@ namespace cv { class dls { public: - dls(const cv::Mat& opoints, const cv::Mat& ipoints); + dls(const Mat& opoints, const Mat& ipoints); ~dls(); - bool compute_pose(cv::Mat& R, cv::Mat& t); + bool compute_pose(Mat& R, Mat& t); private: // initialisation template - void init_points(const cv::Mat& opoints, const cv::Mat& ipoints) + void init_points(const Mat& opoints, const Mat& ipoints) { for(int i = 0; i < N; i++) { @@ -47,33 +47,33 @@ private: } // main algorithm - cv::Mat LeftMultVec(const cv::Mat& v); - void run_kernel(const cv::Mat& pp); - void build_coeff_matrix(const cv::Mat& pp, cv::Mat& Mtilde, cv::Mat& D); - void compute_eigenvec(const cv::Mat& Mtilde, cv::Mat& eigenval_real, cv::Mat& eigenval_imag, - cv::Mat& eigenvec_real, cv::Mat& eigenvec_imag); - void fill_coeff(const cv::Mat * D); + Mat LeftMultVec(const Mat& v); + void run_kernel(const Mat& pp); + void build_coeff_matrix(const Mat& pp, Mat& Mtilde, Mat& D); + void compute_eigenvec(const Mat& Mtilde, Mat& eigenval_real, Mat& eigenval_imag, + Mat& eigenvec_real, Mat& eigenvec_imag); + void fill_coeff(const Mat * D); // useful functions - cv::Mat cayley_LS_M(const std::vector& a, const std::vector& b, - const std::vector& c, const std::vector& u); - cv::Mat Hessian(const double s[]); - cv::Mat cayley2rotbar(const cv::Mat& s); - cv::Mat skewsymm(const cv::Mat * X1); + Mat cayley_LS_M(const std::vector& a, const std::vector& b, + const std::vector& c, const std::vector& u); + Mat Hessian(const double s[]); + Mat cayley2rotbar(const Mat& s); + Mat skewsymm(const Mat * X1); // extra functions - cv::Mat rotx(const double t); - cv::Mat roty(const double t); - cv::Mat rotz(const double t); - cv::Mat mean(const cv::Mat& M); - bool is_empty(const cv::Mat * v); - bool positive_eigenvalues(const cv::Mat * eigenvalues); + Mat rotx(const double t); + Mat roty(const double t); + Mat rotz(const double t); + Mat mean(const Mat& M); + bool is_empty(const Mat * v); + bool positive_eigenvalues(const Mat * eigenvalues); - cv::Mat p, z, mn; // object-image points + Mat p, z, mn; // object-image points int N; // number of input points std::vector f1coeff, f2coeff, f3coeff, cost_; // coefficient for coefficients matrix - std::vector C_est_, t_est_; // optimal candidates - cv::Mat C_est__, t_est__; // optimal found solution + std::vector C_est_, t_est_; // optimal candidates + Mat C_est__, t_est__; // optimal found solution double cost__; // optimal found solution }; @@ -736,7 +736,7 @@ public: { /*if(isSymmetric(src)) { // Fall back to OpenCV for a symmetric matrix! - cv::eigen(src, _eigenvalues, _eigenvectors); + eigen(src, _eigenvalues, _eigenvectors); } else {*/ Mat tmp; // Convert the given input matrix to double. Is there any way to @@ -768,6 +768,5 @@ public: Mat eigenvectors() { return _eigenvectors; } }; -} - +} // namespace cv #endif // DLS_H diff --git a/modules/3d/src/solvepnp.cpp b/modules/3d/src/solvepnp.cpp index 6cee6a7485..bd55fea5e4 100644 --- a/modules/3d/src/solvepnp.cpp +++ b/modules/3d/src/solvepnp.cpp @@ -103,12 +103,12 @@ void drawFrameAxes(InputOutputArray image, InputArray cameraMatrix, InputArray d CV_Assert(length > 0); // project axes points - vector axesPoints; + std::vector axesPoints; axesPoints.push_back(Point3f(0, 0, 0)); axesPoints.push_back(Point3f(length, 0, 0)); axesPoints.push_back(Point3f(0, length, 0)); axesPoints.push_back(Point3f(0, 0, length)); - vector imagePoints; + std::vector imagePoints; projectPoints(axesPoints, rvec, tvec, cameraMatrix, distCoeffs, imagePoints); // draw axes lines @@ -123,7 +123,7 @@ bool solvePnP( InputArray opoints, InputArray ipoints, { CV_INSTRUMENT_REGION(); - vector rvecs, tvecs; + std::vector rvecs, tvecs; int solutions = solvePnPGeneric(opoints, ipoints, cameraMatrix, distCoeffs, rvecs, tvecs, useExtrinsicGuess, (SolvePnPMethod)flags, rvec, tvec); if (solutions > 0) @@ -321,8 +321,8 @@ bool solvePnPRansac(InputArray _opoints, InputArray _ipoints, return false; } - vector opoints_inliers; - vector ipoints_inliers; + std::vector opoints_inliers; + std::vector ipoints_inliers; opoints = opoints.reshape(3); ipoints = ipoints.reshape(2); opoints.convertTo(opoints_inliers, CV_64F); @@ -472,7 +472,7 @@ int solveP3P( InputArray _opoints, InputArray _ipoints, else imgPts = imgPts.reshape(1, 2*imgPts.rows); - vector reproj_errors(solutions); + std::vector reproj_errors(solutions); for (size_t i = 0; i < reproj_errors.size(); i++) { Mat rvec; @@ -753,7 +753,7 @@ static void solvePnPRefine(InputArray _objectPoints, InputArray _imagePoints, rvec0.convertTo(rvec, CV_64F); tvec0.convertTo(tvec, CV_64F); - vector ipoints_normalized; + std::vector ipoints_normalized; undistortPoints(ipoints, ipoints_normalized, cameraMatrix, distCoeffs); Mat sd = Mat(ipoints_normalized).reshape(1, npoints*2); Mat objectPoints0 = opoints.reshape(1, npoints); @@ -847,7 +847,7 @@ int solvePnPGeneric( InputArray _opoints, InputArray _ipoints, Mat cameraMatrix = Mat_(cameraMatrix0); Mat distCoeffs = Mat_(distCoeffs0); - vector vec_rvecs, vec_tvecs; + std::vector vec_rvecs, vec_tvecs; if (flags == SOLVEPNP_EPNP || flags == SOLVEPNP_DLS || flags == SOLVEPNP_UPNP) { if (flags == SOLVEPNP_DLS) @@ -872,7 +872,7 @@ int solvePnPGeneric( InputArray _opoints, InputArray _ipoints, } else if (flags == SOLVEPNP_P3P || flags == SOLVEPNP_AP3P) { - vector rvecs, tvecs; + std::vector rvecs, tvecs; solveP3P(opoints, ipoints, _cameraMatrix, _distCoeffs, rvecs, tvecs, flags); vec_rvecs.insert(vec_rvecs.end(), rvecs.begin(), rvecs.end()); vec_tvecs.insert(vec_tvecs.end(), tvecs.begin(), tvecs.end()); @@ -1120,7 +1120,7 @@ int solvePnPGeneric( InputArray _opoints, InputArray _ipoints, for (size_t i = 0; i < vec_rvecs.size(); i++) { - vector projectedPoints; + std::vector projectedPoints; projectPoints(objectPoints, vec_rvecs[i], vec_tvecs[i], cameraMatrix, distCoeffs, projectedPoints); double rmse = norm(Mat(projectedPoints, false), imagePoints, NORM_L2) / sqrt(2*projectedPoints.size()); diff --git a/modules/core/include/opencv2/core/bindings_utils.hpp b/modules/core/include/opencv2/core/bindings_utils.hpp index 22a86ff9be..7a50390aed 100644 --- a/modules/core/include/opencv2/core/bindings_utils.hpp +++ b/modules/core/include/opencv2/core/bindings_utils.hpp @@ -219,6 +219,12 @@ AsyncArray testAsyncException() return p.getArrayResult(); } +namespace nested { +CV_WRAP static inline bool testEchoBooleanFunction(bool flag) { + return flag; +} +} // namespace nested + namespace fs { CV_EXPORTS_W cv::String getCacheDirectoryForDownloads(); } // namespace fs diff --git a/modules/core/include/opencv2/core/core_c.h b/modules/core/include/opencv2/core/core_c.h index b37297e146..bd9b516003 100644 --- a/modules/core/include/opencv2/core/core_c.h +++ b/modules/core/include/opencv2/core/core_c.h @@ -48,16 +48,19 @@ #include "opencv2/core/types_c.h" #ifdef __cplusplus -# ifdef _MSC_VER -/* disable warning C4190: 'function' has C-linkage specified, but returns UDT 'typename' - which is incompatible with C +/* disable MSVC warning C4190 / clang-cl -Wreturn-type-c-linkage: + 'function' has C-linkage specified, but returns UDT 'typename' + which is incompatible with C It is OK to disable it because we only extend few plain structures with C++ constructors for simpler interoperability with C++ API of the library */ -# pragma warning(disable:4190) -# elif defined __clang__ && __clang_major__ >= 3 +# if defined(__clang__) + // handle clang on Linux and clang-cl (i. e. clang on Windows) first # pragma GCC diagnostic ignored "-Wreturn-type-c-linkage" +# elif defined(_MSC_VER) + // then handle MSVC +# pragma warning(disable:4190) # endif #endif diff --git a/modules/core/include/opencv2/core/cuda.hpp b/modules/core/include/opencv2/core/cuda.hpp index 716b8bf2a8..719003f21f 100644 --- a/modules/core/include/opencv2/core/cuda.hpp +++ b/modules/core/include/opencv2/core/cuda.hpp @@ -924,7 +924,7 @@ public: INTERPROCESS = 0x04 /**< Event is suitable for interprocess use. DisableTiming must be set */ }; - CV_WRAP explicit Event(Event::CreateFlags flags = Event::CreateFlags::DEFAULT); + CV_WRAP explicit Event(const Event::CreateFlags flags = Event::CreateFlags::DEFAULT); //! records an event CV_WRAP void record(Stream& stream = Stream::Null()); @@ -946,6 +946,7 @@ private: friend struct EventAccessor; }; +CV_ENUM_FLAGS(Event::CreateFlags) //! @} cudacore_struct diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index e39f229f6e..87a51748bb 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -444,7 +444,16 @@ CV_EXPORTS InputOutputArray noArray(); /////////////////////////////////// MatAllocator ////////////////////////////////////// -//! Usage flags for allocator +/** @brief Usage flags for allocator + + @warning All flags except `USAGE_DEFAULT` are experimental. + + @warning For the OpenCL allocator, `USAGE_ALLOCATE_SHARED_MEMORY` depends on + OpenCV's optional, experimental integration with OpenCL SVM. To enable this + integration, build OpenCV using the `WITH_OPENCL_SVM=ON` CMake option and, at + runtime, call `cv::ocl::Context::getDefault().setUseSVM(true);` or similar + code. Note that SVM is incompatible with OpenCL 1.x. +*/ enum UMatUsageFlags { USAGE_DEFAULT = 0, @@ -2076,7 +2085,7 @@ public: Mat_ image = Mat::zeros(3, sizes, CV_8UC3); - image.forEach([&](Pixel& pixel, const int position[]) -> void { + image.forEach([](Pixel& pixel, const int position[]) -> void { pixel.x = position[0]; pixel.y = position[1]; pixel.z = position[2]; diff --git a/modules/core/include/opencv2/core/persistence.hpp b/modules/core/include/opencv2/core/persistence.hpp index 276f640323..8e135d1a11 100644 --- a/modules/core/include/opencv2/core/persistence.hpp +++ b/modules/core/include/opencv2/core/persistence.hpp @@ -309,8 +309,8 @@ public: READ = 0, //!< value, open the file for reading WRITE = 1, //!< value, open the file for writing APPEND = 2, //!< value, open the file for appending - MEMORY = 4, //!< flag, read data from source or write data to the internal buffer (which is - //!< returned by FileStorage::release) + MEMORY = 4, /**< flag, read data from source or write data to the internal buffer (which is + returned by FileStorage::release) */ FORMAT_MASK = (7<<3), //!< mask for format flags FORMAT_AUTO = 0, //!< flag, auto format FORMAT_XML = (1<<3), //!< flag, XML format diff --git a/modules/core/include/opencv2/core/utils/fp_control.private.hpp b/modules/core/include/opencv2/core/utils/fp_control.private.hpp new file mode 100644 index 0000000000..12ee363dd8 --- /dev/null +++ b/modules/core/include/opencv2/core/utils/fp_control.private.hpp @@ -0,0 +1,29 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP +#define OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP + +#include "fp_control_utils.hpp" + +#if OPENCV_SUPPORTS_FP_DENORMALS_HINT == 0 + // disabled +#elif defined(OPENCV_IMPL_FP_HINTS) + // custom +#elif defined(OPENCV_IMPL_FP_HINTS_X86) + // custom +#elif defined(__SSE__) || defined(__SSE2__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) + #include + #define OPENCV_IMPL_FP_HINTS_X86 1 + #define OPENCV_IMPL_FP_HINTS 1 +#endif + +#ifndef OPENCV_IMPL_FP_HINTS +#define OPENCV_IMPL_FP_HINTS 0 +#endif +#ifndef OPENCV_IMPL_FP_HINTS_X86 +#define OPENCV_IMPL_FP_HINTS_X86 0 +#endif + +#endif // OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP diff --git a/modules/core/include/opencv2/core/utils/fp_control_utils.hpp b/modules/core/include/opencv2/core/utils/fp_control_utils.hpp new file mode 100644 index 0000000000..930bc5d367 --- /dev/null +++ b/modules/core/include/opencv2/core/utils/fp_control_utils.hpp @@ -0,0 +1,69 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_FP_CONTROL_UTILS_HPP +#define OPENCV_CORE_FP_CONTROL_UTILS_HPP + +namespace cv { + +namespace details { + +struct FPDenormalsModeState +{ + uint32_t reserved[16]; // 64-bytes +}; // FPDenormalsModeState + +CV_EXPORTS void setFPDenormalsIgnoreHint(bool ignore, CV_OUT FPDenormalsModeState& state); +CV_EXPORTS int saveFPDenormalsState(CV_OUT FPDenormalsModeState& state); +CV_EXPORTS bool restoreFPDenormalsState(const FPDenormalsModeState& state); + +class FPDenormalsIgnoreHintScope +{ +public: + inline explicit FPDenormalsIgnoreHintScope(bool ignore = true) + { + details::setFPDenormalsIgnoreHint(ignore, saved_state); + } + + inline explicit FPDenormalsIgnoreHintScope(const FPDenormalsModeState& state) + { + details::saveFPDenormalsState(saved_state); + details::restoreFPDenormalsState(state); + } + + inline ~FPDenormalsIgnoreHintScope() + { + details::restoreFPDenormalsState(saved_state); + } + +protected: + FPDenormalsModeState saved_state; +}; // FPDenormalsIgnoreHintScope + +class FPDenormalsIgnoreHintScopeNOOP +{ +public: + inline FPDenormalsIgnoreHintScopeNOOP(bool ignore = true) { CV_UNUSED(ignore); } + inline FPDenormalsIgnoreHintScopeNOOP(const FPDenormalsModeState& state) { CV_UNUSED(state); } + inline ~FPDenormalsIgnoreHintScopeNOOP() { } +}; // FPDenormalsIgnoreHintScopeNOOP + +} // namespace details + + +// Should depend on target compilation architecture only +// Note: previously added archs should NOT be removed to preserve ABI compatibility +#if defined(OPENCV_SUPPORTS_FP_DENORMALS_HINT) + // preserve configuration overloading through ports +#elif defined(__i386__) || defined(__x86_64__) || defined(_M_X64) || defined(_X86_) +typedef details::FPDenormalsIgnoreHintScope FPDenormalsIgnoreHintScope; +#define OPENCV_SUPPORTS_FP_DENORMALS_HINT 1 +#else +#define OPENCV_SUPPORTS_FP_DENORMALS_HINT 0 +typedef details::FPDenormalsIgnoreHintScopeNOOP FPDenormalsIgnoreHintScope; +#endif + +} // namespace cv + +#endif // OPENCV_CORE_FP_CONTROL_UTILS_HPP diff --git a/modules/core/include/opencv2/core/vsx_utils.hpp b/modules/core/include/opencv2/core/vsx_utils.hpp index 68863ffb36..5cbc066784 100644 --- a/modules/core/include/opencv2/core/vsx_utils.hpp +++ b/modules/core/include/opencv2/core/vsx_utils.hpp @@ -684,7 +684,8 @@ VSX_IMPL_LOAD_L8(vec_double2, double) #endif // absolute difference -#ifndef vec_absd +#ifndef _ARCH_PWR9 +# undef vec_absd # define vec_absd(a, b) vec_sub(vec_max(a, b), vec_min(a, b)) #endif diff --git a/modules/core/perf/perf_reduce.cpp b/modules/core/perf/perf_reduce.cpp index 8f9c2e8349..dcc0205fdc 100644 --- a/modules/core/perf/perf_reduce.cpp +++ b/modules/core/perf/perf_reduce.cpp @@ -23,7 +23,7 @@ PERF_TEST_P(Size_MatType_ROp, reduceR, int reduceOp = get<2>(GetParam()); int ddepth = -1; - if( CV_MAT_DEPTH(matType) < CV_32S && (reduceOp == CV_REDUCE_SUM || reduceOp == CV_REDUCE_AVG) ) + if( CV_MAT_DEPTH(matType) < CV_32S && (reduceOp == REDUCE_SUM || reduceOp == REDUCE_AVG) ) ddepth = CV_32S; Mat src(sz, matType); @@ -51,7 +51,7 @@ PERF_TEST_P(Size_MatType_ROp, reduceC, int reduceOp = get<2>(GetParam()); int ddepth = -1; - if( CV_MAT_DEPTH(matType)< CV_32S && (reduceOp == CV_REDUCE_SUM || reduceOp == CV_REDUCE_AVG) ) + if( CV_MAT_DEPTH(matType)< CV_32S && (reduceOp == REDUCE_SUM || reduceOp == REDUCE_AVG) ) ddepth = CV_32S; Mat src(sz, matType); diff --git a/modules/core/src/cuda/gpu_mat.cu b/modules/core/src/cuda/gpu_mat.cu index f31f78a87a..c286f28eb0 100644 --- a/modules/core/src/cuda/gpu_mat.cu +++ b/modules/core/src/cuda/gpu_mat.cu @@ -184,11 +184,8 @@ void cv::cuda::GpuMat::create(int _rows, int _cols, int _type) if (esz * cols == step) flags |= Mat::CONTINUOUS_FLAG; - int64 _nettosize = static_cast(step) * rows; - size_t nettosize = static_cast(_nettosize); - datastart = data; - dataend = data + nettosize; + dataend = data + step * (rows - 1) + cols * esz; if (refcount) *refcount = 1; diff --git a/modules/core/src/cuda_stream.cpp b/modules/core/src/cuda_stream.cpp index 3680e0720a..3f647c8d55 100644 --- a/modules/core/src/cuda_stream.cpp +++ b/modules/core/src/cuda_stream.cpp @@ -811,7 +811,7 @@ Event cv::cuda::EventAccessor::wrapEvent(cudaEvent_t event) #endif -cv::cuda::Event::Event(CreateFlags flags) +cv::cuda::Event::Event(const Event::CreateFlags flags) { #ifndef HAVE_CUDA CV_UNUSED(flags); diff --git a/modules/core/src/hal_internal.cpp b/modules/core/src/hal_internal.cpp index 44540e7914..2ba992111c 100644 --- a/modules/core/src/hal_internal.cpp +++ b/modules/core/src/hal_internal.cpp @@ -64,6 +64,16 @@ #define HAL_LU_SMALL_MATRIX_THRESH 100 #define HAL_CHOLESKY_SMALL_MATRIX_THRESH 100 +#if defined(__clang__) && defined(__has_feature) +#if __has_feature(memory_sanitizer) +#define CV_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) \ +__msan_unpoison(adresse, size) +#endif +#endif +#ifndef CV_ANNOTATE_MEMORY_IS_INITIALIZED +#define CV_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) do { } while(0) +#endif + //lapack stores matrices in column-major order so transposing is needed everywhere template static inline void transpose_square_inplace(fptype *src, size_t src_ld, size_t m) @@ -248,6 +258,17 @@ lapack_SVD(fptype* a, size_t a_step, fptype *w, fptype* u, size_t u_step, fptype OCV_LAPACK_FUNC(dgesdd)(mode, &m, &n, (double*)a, &lda, (double*)w, (double*)u, &ldu, (double*)vt, &ldv, (double*)&buffer[0], &lwork, &iworkBuf[0], info); + // Make sure MSAN sees the memory as having been written. + // MSAN does not think it has been written because a different language was called. + CV_ANNOTATE_MEMORY_IS_INITIALIZED(a, a_step * n); + CV_ANNOTATE_MEMORY_IS_INITIALIZED(buffer, sizeof(fptype) * (lwork + 1)); + if (u) + CV_ANNOTATE_MEMORY_IS_INITIALIZED(u, u_step * m); + if (vt) + CV_ANNOTATE_MEMORY_IS_INITIALIZED(vt, v_step * n); + if (w) + CV_ANNOTATE_MEMORY_IS_INITIALIZED(w, sizeof(fptype) * std::min(m, n)); + if(!(flags & CV_HAL_SVD_NO_UV)) transpose_square_inplace(vt, ldv, n); @@ -359,6 +380,7 @@ lapack_QR(fptype* a, size_t a_step, int m, int n, int k, fptype* b, size_t b_ste dgeqrf_(&m, &n, (double*)tmpA, &ldtmpA, (double*)dst, (double*)buffer, &lwork, info); } + CV_ANNOTATE_MEMORY_IS_INITIALIZED(info, sizeof(int)); if (m == n) transpose_square_inplace(a, lda, m); else diff --git a/modules/core/src/kmeans.cpp b/modules/core/src/kmeans.cpp index 3e72ddf6a4..c7a03c814b 100644 --- a/modules/core/src/kmeans.cpp +++ b/modules/core/src/kmeans.cpp @@ -240,7 +240,7 @@ double cv::kmeans( InputArray _data, int K, attempts = std::max(attempts, 1); CV_Assert( data0.dims <= 2 && type == CV_32F && K > 0 ); - CV_CheckGE(N, K, "Number of clusters should be more than number of elements"); + CV_CheckGE(N, K, "There can't be more clusters than elements"); Mat data(N, dims, CV_32F, data0.ptr(), isrow ? dims * sizeof(float) : static_cast(data0.step)); diff --git a/modules/core/src/matmul.dispatch.cpp b/modules/core/src/matmul.dispatch.cpp index 92e44f45c9..52200f097c 100644 --- a/modules/core/src/matmul.dispatch.cpp +++ b/modules/core/src/matmul.dispatch.cpp @@ -804,7 +804,7 @@ void calcCovarMatrix( InputArray _src, OutputArray _covar, InputOutputArray _mea else { ctype = std::max(CV_MAT_DEPTH(ctype >= 0 ? ctype : type), CV_32F); - reduce( _src, _mean, takeRows ? 0 : 1, CV_REDUCE_AVG, ctype ); + reduce( _src, _mean, takeRows ? 0 : 1, REDUCE_AVG, ctype ); mean = _mean.getMat(); } diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 1729862cb7..6a381c15a0 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -176,27 +176,23 @@ public: } }; -namespace +static +MatAllocator*& getDefaultAllocatorMatRef() { - MatAllocator* volatile g_matAllocator = NULL; + static MatAllocator* g_matAllocator = Mat::getStdAllocator(); + return g_matAllocator; } MatAllocator* Mat::getDefaultAllocator() { - if (g_matAllocator == NULL) - { - cv::AutoLock lock(cv::getInitializationMutex()); - if (g_matAllocator == NULL) - { - g_matAllocator = getStdAllocator(); - } - } - return g_matAllocator; + return getDefaultAllocatorMatRef(); } + void Mat::setDefaultAllocator(MatAllocator* allocator) { - g_matAllocator = allocator; + getDefaultAllocatorMatRef() = allocator; } + MatAllocator* Mat::getStdAllocator() { CV_SINGLETON_LAZY_INIT(MatAllocator, new StdMatAllocator()) @@ -269,7 +265,7 @@ void setSize( Mat& m, int _dims, const int* _sz, const size_t* _steps, bool auto else if( autoSteps ) { m.step.p[i] = total; - int64 total1 = (int64)total*s; + uint64 total1 = (uint64)total*s; if( (uint64)total1 != (size_t)total1 ) CV_Error( CV_StsOutOfRange, "The total matrix size does not fit to \"size_t\" type" ); total = (size_t)total1; diff --git a/modules/core/src/matrix_operations.cpp b/modules/core/src/matrix_operations.cpp index 62e92fd5d3..f9a50cd0ee 100644 --- a/modules/core/src/matrix_operations.cpp +++ b/modules/core/src/matrix_operations.cpp @@ -616,7 +616,7 @@ static bool ocl_reduce(InputArray _src, OutputArray _dst, if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) return false; - if (op == CV_REDUCE_AVG) + if (op == REDUCE_AVG) { if (sdepth < CV_32S && ddepth < CV_32S) ddepth = CV_32S; @@ -654,7 +654,7 @@ static bool ocl_reduce(InputArray _src, OutputArray _dst, _dst.create(dsize, dtype); UMat dst = _dst.getUMat(); - if (op0 == CV_REDUCE_AVG) + if (op0 == REDUCE_AVG) k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(dst), 1.0f / src.cols); else @@ -690,7 +690,7 @@ static bool ocl_reduce(InputArray _src, OutputArray _dst, ocl::KernelArg srcarg = ocl::KernelArg::ReadOnly(src), temparg = ocl::KernelArg::WriteOnlyNoSize(dst); - if (op0 == CV_REDUCE_AVG) + if (op0 == REDUCE_AVG) k.args(srcarg, temparg, 1.0f / (dim == 0 ? src.rows : src.cols)); else k.args(srcarg, temparg); @@ -717,8 +717,8 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype) int ddepth = CV_MAT_DEPTH(dtype); CV_Assert( cn == CV_MAT_CN(dtype) ); - CV_Assert( op == CV_REDUCE_SUM || op == CV_REDUCE_MAX || - op == CV_REDUCE_MIN || op == CV_REDUCE_AVG ); + CV_Assert( op == REDUCE_SUM || op == REDUCE_MAX || + op == REDUCE_MIN || op == REDUCE_AVG ); CV_OCL_RUN(_dst.isUMat(), ocl_reduce(_src, _dst, dim, op, op0, stype, dtype)) @@ -732,9 +732,9 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype) _dst.create(dim == 0 ? 1 : src.rows, dim == 0 ? src.cols : 1, dtype); Mat dst = _dst.getMat(), temp = dst; - if( op == CV_REDUCE_AVG ) + if( op == REDUCE_AVG ) { - op = CV_REDUCE_SUM; + op = REDUCE_SUM; if( sdepth < CV_32S && ddepth < CV_32S ) { temp.create(dst.rows, dst.cols, CV_32SC(cn)); @@ -745,7 +745,7 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype) ReduceFunc func = 0; if( dim == 0 ) { - if( op == CV_REDUCE_SUM ) + if( op == REDUCE_SUM ) { if(sdepth == CV_8U && ddepth == CV_32S) func = GET_OPTIMIZED(reduceSumR8u32s); @@ -768,7 +768,7 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype) else if(sdepth == CV_64F && ddepth == CV_64F) func = reduceSumR64f64f; } - else if(op == CV_REDUCE_MAX) + else if(op == REDUCE_MAX) { if(sdepth == CV_8U && ddepth == CV_8U) func = GET_OPTIMIZED(reduceMaxR8u); @@ -781,7 +781,7 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype) else if(sdepth == CV_64F && ddepth == CV_64F) func = reduceMaxR64f; } - else if(op == CV_REDUCE_MIN) + else if(op == REDUCE_MIN) { if(sdepth == CV_8U && ddepth == CV_8U) func = GET_OPTIMIZED(reduceMinR8u); @@ -797,7 +797,7 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype) } else { - if(op == CV_REDUCE_SUM) + if(op == REDUCE_SUM) { if(sdepth == CV_8U && ddepth == CV_32S) func = GET_OPTIMIZED(reduceSumC8u32s); @@ -820,7 +820,7 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype) else if(sdepth == CV_64F && ddepth == CV_64F) func = reduceSumC64f64f; } - else if(op == CV_REDUCE_MAX) + else if(op == REDUCE_MAX) { if(sdepth == CV_8U && ddepth == CV_8U) func = GET_OPTIMIZED(reduceMaxC8u); @@ -833,7 +833,7 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype) else if(sdepth == CV_64F && ddepth == CV_64F) func = reduceMaxC64f; } - else if(op == CV_REDUCE_MIN) + else if(op == REDUCE_MIN) { if(sdepth == CV_8U && ddepth == CV_8U) func = GET_OPTIMIZED(reduceMinC8u); @@ -854,7 +854,7 @@ void cv::reduce(InputArray _src, OutputArray _dst, int dim, int op, int dtype) func( src, temp ); - if( op0 == CV_REDUCE_AVG ) + if( op0 == REDUCE_AVG ) temp.convertTo(dst, dst.type(), 1./(dim == 0 ? src.rows : src.cols)); } @@ -940,8 +940,8 @@ static bool ipp_sort(const Mat& src, Mat& dst, int flags) { CV_INSTRUMENT_REGION_IPP(); - bool sortRows = (flags & 1) == CV_SORT_EVERY_ROW; - bool sortDescending = (flags & CV_SORT_DESCENDING) != 0; + bool sortRows = (flags & 1) == SORT_EVERY_ROW; + bool sortDescending = (flags & SORT_DESCENDING) != 0; bool inplace = (src.data == dst.data); int depth = src.depth(); IppDataType type = ippiGetDataType(depth); diff --git a/modules/core/src/parallel.cpp b/modules/core/src/parallel.cpp index 684a1a2ee3..2c66a120aa 100644 --- a/modules/core/src/parallel.cpp +++ b/modules/core/src/parallel.cpp @@ -153,6 +153,9 @@ #include "opencv2/core/detail/exception_ptr.hpp" // CV__EXCEPTION_PTR = 1 if std::exception_ptr is available +#include +#include + using namespace cv; namespace cv { @@ -203,6 +206,9 @@ namespace { // propagate main thread state rng = cv::theRNG(); +#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS + details::saveFPDenormalsState(fp_denormals_base_state); +#endif #ifdef OPENCV_TRACE traceRootRegion = CV_TRACE_NS::details::getCurrentRegion(); @@ -283,6 +289,11 @@ namespace { } } } + +#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS + details::FPDenormalsModeState fp_denormals_base_state; +#endif + private: ParallelLoopBodyWrapperContext(const ParallelLoopBodyWrapperContext&); // disabled ParallelLoopBodyWrapperContext& operator=(const ParallelLoopBodyWrapperContext&); // disabled @@ -319,6 +330,9 @@ namespace { // propagate main thread state cv::theRNG() = ctx.rng; +#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS + FPDenormalsIgnoreHintScope fp_denormals_scope(ctx.fp_denormals_base_state); +#endif cv::Range r; cv::Range wholeRange = ctx.wholeRange; diff --git a/modules/core/src/persistence.cpp b/modules/core/src/persistence.cpp index ae6a5a04fe..6789c78e9d 100644 --- a/modules/core/src/persistence.cpp +++ b/modules/core/src/persistence.cpp @@ -9,6 +9,8 @@ #include #include +#include + namespace cv { @@ -499,21 +501,29 @@ bool FileStorage::Impl::open(const char *filename_or_buf, int _flags, const char if (!isGZ) { file = fopen(filename.c_str(), !write_mode ? "rt" : !append ? "wt" : "a+t"); if (!file) + { + CV_LOG_ERROR(NULL, "Can't open file: '" << filename << "' in " << (!write_mode ? "read" : !append ? "write" : "append") << " mode"); return false; + } } else { #if USE_ZLIB char mode[] = {write_mode ? 'w' : 'r', 'b', compression ? compression : '3', '\0'}; gzfile = gzopen(filename.c_str(), mode); if (!gzfile) + { + CV_LOG_ERROR(NULL, "Can't open archive: '" << filename << "' mode=" << mode); return false; + } #else CV_Error(cv::Error::StsNotImplemented, "There is no compressed file storage support in this configuration"); #endif } } + // FIXIT release() must do that, use CV_Assert() here instead roots.clear(); fs_data.clear(); + wrap_margin = 71; fmt = FileStorage::FORMAT_AUTO; @@ -616,14 +626,14 @@ bool FileStorage::Impl::open(const char *filename_or_buf, int _flags, const char puts("\n"); } - emitter = createXMLEmitter(this); + emitter_do_not_use_direct_dereference = createXMLEmitter(this); } else if (fmt == FileStorage::FORMAT_YAML) { if (!append) puts("%YAML:1.0\n---\n"); else puts("...\n---\n"); - emitter = createYAMLEmitter(this); + emitter_do_not_use_direct_dereference = createYAMLEmitter(this); } else { CV_Assert(fmt == FileStorage::FORMAT_JSON); if (!append) @@ -653,7 +663,7 @@ bool FileStorage::Impl::open(const char *filename_or_buf, int _flags, const char } } write_stack.back().indent = 4; - emitter = createJSONEmitter(this); + emitter_do_not_use_direct_dereference = createJSONEmitter(this); } is_opened = true; } else { @@ -701,20 +711,20 @@ bool FileStorage::Impl::open(const char *filename_or_buf, int _flags, const char switch (fmt) { case FileStorage::FORMAT_XML: - parser = createXMLParser(this); + parser_do_not_use_direct_dereference = createXMLParser(this); break; case FileStorage::FORMAT_YAML: - parser = createYAMLParser(this); + parser_do_not_use_direct_dereference = createYAMLParser(this); break; case FileStorage::FORMAT_JSON: - parser = createJSONParser(this); + parser_do_not_use_direct_dereference = createJSONParser(this); break; default: - parser = Ptr(); + parser_do_not_use_direct_dereference = Ptr(); } - if (!parser.empty()) { - ok = parser->parse(ptr); + if (!parser_do_not_use_direct_dereference.empty()) { + ok = getParser().parse(ptr); if (ok) { finalizeCollection(root_nodes); @@ -728,7 +738,9 @@ bool FileStorage::Impl::open(const char *filename_or_buf, int _flags, const char } } } - catch (...) { + catch (...) + { + // FIXIT log error message is_opened = true; release(); throw; @@ -926,7 +938,7 @@ void FileStorage::Impl::endWriteStruct() { if (fmt == FileStorage::FORMAT_JSON && !FileNode::isFlow(current_struct.flags) && write_stack.size() > 1) current_struct.indent = write_stack[write_stack.size() - 2].indent; - emitter->endWriteStruct(current_struct); + getEmitter().endWriteStruct(current_struct); write_stack.pop_back(); if (!write_stack.empty()) @@ -945,7 +957,7 @@ void FileStorage::Impl::startWriteStruct_helper(const char *key, int struct_flag if (type_name && type_name[0] == '\0') type_name = 0; - FStructData s = emitter->startWriteStruct(write_stack.back(), key, struct_flags, type_name); + FStructData s = getEmitter().startWriteStruct(write_stack.back(), key, struct_flags, type_name); write_stack.push_back(s); size_t write_stack_size = write_stack.size(); @@ -956,7 +968,7 @@ void FileStorage::Impl::startWriteStruct_helper(const char *key, int struct_flag flush(); if (fmt == FileStorage::FORMAT_JSON && type_name && type_name[0] && FileNode::isMap(struct_flags)) { - emitter->write("type_id", type_name, false); + getEmitter().write("type_id", type_name, false); } } @@ -997,7 +1009,7 @@ void FileStorage::Impl::startWriteStruct(const char *key, int struct_flags, void FileStorage::Impl::writeComment(const char *comment, bool eol_comment) { CV_Assert(write_mode); - emitter->writeComment(comment, eol_comment); + getEmitter().writeComment(comment, eol_comment); } void FileStorage::Impl::startNextStream() { @@ -1006,7 +1018,7 @@ void FileStorage::Impl::startNextStream() { while (!write_stack.empty()) endWriteStruct(); flush(); - emitter->startNextStream(); + getEmitter().startNextStream(); empty_stream = true; write_stack.push_back(FStructData("", FileNode::EMPTY, 0)); bufofs = 0; @@ -1015,17 +1027,17 @@ void FileStorage::Impl::startNextStream() { void FileStorage::Impl::write(const String &key, int value) { CV_Assert(write_mode); - emitter->write(key.c_str(), value); + getEmitter().write(key.c_str(), value); } void FileStorage::Impl::write(const String &key, double value) { CV_Assert(write_mode); - emitter->write(key.c_str(), value); + getEmitter().write(key.c_str(), value); } void FileStorage::Impl::write(const String &key, const String &value) { CV_Assert(write_mode); - emitter->write(key.c_str(), value.c_str(), false); + getEmitter().write(key.c_str(), value.c_str(), false); } void FileStorage::Impl::writeRawData(const std::string &dt, const void *_data, size_t len) { @@ -1111,7 +1123,7 @@ void FileStorage::Impl::writeRawData(const std::string &dt, const void *_data, s return; } - emitter->writeScalar(0, ptr); + getEmitter().writeScalar(0, ptr); } offset = (int) (data - data0); @@ -1597,8 +1609,8 @@ FileStorage::Impl::Base64Decoder::Base64Decoder() { eos = true; } -void FileStorage::Impl::Base64Decoder::init(Ptr &_parser, char *_ptr, int _indent) { - parser = _parser; +void FileStorage::Impl::Base64Decoder::init(const Ptr &_parser, char *_ptr, int _indent) { + parser_do_not_use_direct_dereference = _parser; ptr = _ptr; indent = _indent; encoded.clear(); @@ -1641,9 +1653,9 @@ bool FileStorage::Impl::Base64Decoder::readMore(int needed) { decoded.resize(sz); ofs = 0; - CV_Assert(!parser.empty() && ptr); + CV_Assert(ptr); char *beg = 0, *end = 0; - bool ok = parser->getBase64Row(ptr, indent, beg, end); + bool ok = getParser().getBase64Row(ptr, indent, beg, end); ptr = end; std::copy(beg, end, std::back_inserter(encoded)); totalchars += end - beg; @@ -1730,7 +1742,7 @@ char *FileStorage::Impl::Base64Decoder::getPtr() const { return ptr; } char *FileStorage::Impl::parseBase64(char *ptr, int indent, FileNode &collection) { const int BASE64_HDR_SIZE = 24; char dt[BASE64_HDR_SIZE + 1] = {0}; - base64decoder.init(parser, ptr, indent); + base64decoder.init(parser_do_not_use_direct_dereference, ptr, indent); int i, k; diff --git a/modules/core/src/persistence_impl.hpp b/modules/core/src/persistence_impl.hpp index 4ea2dc3502..1c261ce772 100644 --- a/modules/core/src/persistence_impl.hpp +++ b/modules/core/src/persistence_impl.hpp @@ -139,7 +139,7 @@ public: { public: Base64Decoder(); - void init(Ptr& _parser, char* _ptr, int _indent); + void init(const Ptr& _parser, char* _ptr, int _indent); bool readMore(int needed); @@ -155,7 +155,13 @@ public: char* getPtr() const; protected: - Ptr parser; + Ptr parser_do_not_use_direct_dereference; + FileStorageParser& getParser() const + { + if (!parser_do_not_use_direct_dereference) + CV_Error(Error::StsNullPtr, "Parser is not available"); + return *parser_do_not_use_direct_dereference; + } char* ptr; int indent; std::vector encoded; @@ -205,8 +211,20 @@ public: std::deque outbuf; - Ptr emitter; - Ptr parser; + Ptr emitter_do_not_use_direct_dereference; + FileStorageEmitter& getEmitter() + { + if (!emitter_do_not_use_direct_dereference) + CV_Error(Error::StsNullPtr, "Emitter is not available"); + return *emitter_do_not_use_direct_dereference; + } + Ptr parser_do_not_use_direct_dereference; + FileStorageParser& getParser() const + { + if (!parser_do_not_use_direct_dereference) + CV_Error(Error::StsNullPtr, "Parser is not available"); + return *parser_do_not_use_direct_dereference; + } Base64Decoder base64decoder; base64::Base64Writer* base64_writer; @@ -228,4 +246,4 @@ public: } -#endif \ No newline at end of file +#endif diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index 5682c3d008..84f0039dc9 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -55,6 +55,9 @@ #include +#include +#include + #ifndef OPENCV_WITH_THREAD_SANITIZER #if defined(__clang__) && defined(__has_feature) #if __has_feature(thread_sanitizer) @@ -630,7 +633,7 @@ struct HWFeatures } } #elif (defined __ppc64__ || defined __PPC64__) && defined __FreeBSD__ - unsigned int hwcap = 0; + unsigned long hwcap = 0; elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap)); if (hwcap & PPC_FEATURE_HAS_VSX) { elf_aux_info(AT_HWCAP2, &hwcap, sizeof(hwcap)); @@ -2720,6 +2723,82 @@ void setUseIPP_NotExact(bool flag) } // namespace ipp + +namespace details { + +#if OPENCV_IMPL_FP_HINTS_X86 +#ifndef _MM_DENORMALS_ZERO_ON // requires pmmintrin.h (SSE3) +#define _MM_DENORMALS_ZERO_ON 0x0040 +#endif +#ifndef _MM_DENORMALS_ZERO_MASK // requires pmmintrin.h (SSE3) +#define _MM_DENORMALS_ZERO_MASK 0x0040 +#endif +#endif + +void setFPDenormalsIgnoreHint(bool ignore, CV_OUT FPDenormalsModeState& state) +{ +#if OPENCV_IMPL_FP_HINTS_X86 + unsigned mask = _MM_FLUSH_ZERO_MASK; + unsigned value = ignore ? _MM_FLUSH_ZERO_ON : 0; + if (featuresEnabled.have[CPU_SSE3]) + { + mask |= _MM_DENORMALS_ZERO_MASK; + value |= ignore ? _MM_DENORMALS_ZERO_ON : 0; + } + const unsigned old_flags = _mm_getcsr(); + const unsigned old_value = old_flags & mask; + unsigned flags = (old_flags & ~mask) | value; + CV_LOG_DEBUG(NULL, "core: update FP mxcsr flags = " << cv::format("0x%08x", flags)); + // save state + state.reserved[0] = (uint32_t)mask; + state.reserved[1] = (uint32_t)old_value; + _mm_setcsr(flags); +#else + CV_UNUSED(ignore); CV_UNUSED(state); +#endif +} + +int saveFPDenormalsState(CV_OUT FPDenormalsModeState& state) +{ +#if OPENCV_IMPL_FP_HINTS_X86 + unsigned mask = _MM_FLUSH_ZERO_MASK; + if (featuresEnabled.have[CPU_SSE3]) + { + mask |= _MM_DENORMALS_ZERO_MASK; + } + const unsigned old_flags = _mm_getcsr(); + const unsigned old_value = old_flags & mask; + // save state + state.reserved[0] = (uint32_t)mask; + state.reserved[1] = (uint32_t)old_value; + return 2; +#else + CV_UNUSED(state); + return 0; +#endif +} + +bool restoreFPDenormalsState(const FPDenormalsModeState& state) +{ +#if OPENCV_IMPL_FP_HINTS_X86 + const unsigned mask = (unsigned)state.reserved[0]; + CV_DbgAssert(mask != 0); // invalid state (ensure that state is properly saved earlier) + const unsigned value = (unsigned)state.reserved[1]; + CV_DbgCheck((int)value, value == (value & mask), "invalid SSE FP state"); + const unsigned old_flags = _mm_getcsr(); + unsigned flags = (old_flags & ~mask) | value; + CV_LOG_DEBUG(NULL, "core: restore FP mxcsr flags = " << cv::format("0x%08x", flags)); + _mm_setcsr(flags); + return true; +#else + CV_UNUSED(state); + return false; +#endif +} + +} // namespace details + + } // namespace cv /* End of file. */ diff --git a/modules/core/src/va_intel.cpp b/modules/core/src/va_intel.cpp index acc502d0f5..cf974298e5 100644 --- a/modules/core/src/va_intel.cpp +++ b/modules/core/src/va_intel.cpp @@ -606,10 +606,36 @@ void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface, if (status != VA_STATUS_SUCCESS) CV_Error(cv::Error::StsError, "VA-API: vaSyncSurface failed"); + bool indirect_buffer = false; VAImage image; status = vaDeriveImage(display, surface, &image); - if (status != VA_STATUS_SUCCESS) - CV_Error(cv::Error::StsError, "VA-API: vaDeriveImage failed"); + if (status != VA_STATUS_SUCCESS){ + //try vaCreateImage + vaPutImage + //pick a format + indirect_buffer = true; + int num_formats = vaMaxNumImageFormats(display); + if (num_formats <= 0) + CV_Error(cv::Error::StsError, "VA-API: vaMaxNumImageFormats failed"); + std::vector fmt_list(num_formats); + + status = vaQueryImageFormats(display, fmt_list.data(), &num_formats); + if (status != VA_STATUS_SUCCESS) + CV_Error(cv::Error::StsError, "VA-API: vaQueryImageFormats failed"); + VAImageFormat *selected_format = nullptr; + for (auto &fmt : fmt_list){ + if (fmt.fourcc == VA_FOURCC_NV12 || fmt.fourcc == VA_FOURCC_YV12){ + selected_format = &fmt; + break; + } + } + if (selected_format == nullptr) + CV_Error(cv::Error::StsError, "VA-API: vaQueryImageFormats did not return a supported format"); + + status = vaCreateImage(display, selected_format, size.width, size.height, &image); + if (status != VA_STATUS_SUCCESS) + CV_Error(cv::Error::StsError, "VA-API: vaCreateImage failed"); + + } unsigned char* buffer = 0; status = vaMapBuffer(display, image.buf, (void **)&buffer); @@ -627,6 +653,14 @@ void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface, if (status != VA_STATUS_SUCCESS) CV_Error(cv::Error::StsError, "VA-API: vaUnmapBuffer failed"); + if (indirect_buffer){ + status = vaPutImage(display, surface, image.image_id, 0, 0, size.width, size.height, 0, 0, size.width, size.height); + if (status != VA_STATUS_SUCCESS){ + vaDestroyImage(display, image.image_id); + CV_Error(cv::Error::StsError, "VA-API: vaPutImage failed"); + } + } + status = vaDestroyImage(display, image.image_id); if (status != VA_STATUS_SUCCESS) CV_Error(cv::Error::StsError, "VA-API: vaDestroyImage failed"); @@ -711,8 +745,37 @@ void convertFromVASurface(VADisplay display, VASurfaceID surface, Size size, Out VAImage image; status = vaDeriveImage(display, surface, &image); - if (status != VA_STATUS_SUCCESS) - CV_Error(cv::Error::StsError, "VA-API: vaDeriveImage failed"); + if (status != VA_STATUS_SUCCESS){ + //try vaCreateImage + vaGetImage + //pick a format + int num_formats = vaMaxNumImageFormats(display); + if (num_formats <= 0) + CV_Error(cv::Error::StsError, "VA-API: vaMaxNumImageFormats failed"); + std::vector fmt_list(num_formats); + + status = vaQueryImageFormats(display, fmt_list.data(), &num_formats); + if (status != VA_STATUS_SUCCESS) + CV_Error(cv::Error::StsError, "VA-API: vaQueryImageFormats failed"); + VAImageFormat *selected_format = nullptr; + for (auto &fmt : fmt_list){ + if (fmt.fourcc == VA_FOURCC_NV12 || fmt.fourcc == VA_FOURCC_YV12){ + selected_format = &fmt; + break; + } + } + if (selected_format == nullptr) + CV_Error(cv::Error::StsError, "VA-API: vaQueryImageFormats did not return a supported format"); + + status = vaCreateImage(display, selected_format, size.width, size.height, &image); + if (status != VA_STATUS_SUCCESS) + CV_Error(cv::Error::StsError, "VA-API: vaCreateImage failed"); + + status = vaGetImage(display, surface, 0, 0, size.width, size.height, image.image_id); + if (status != VA_STATUS_SUCCESS){ + vaDestroyImage(display, image.image_id); + CV_Error(cv::Error::StsError, "VA-API: vaPutImage failed"); + } + } unsigned char* buffer = 0; status = vaMapBuffer(display, image.buf, (void **)&buffer); diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index e6cb82919a..20e3a17755 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -1819,8 +1819,8 @@ OCL_TEST_P(ReduceSum, Mat) { generateTestData(); - OCL_OFF(cv::reduce(src_roi, dst_roi, dim, CV_REDUCE_SUM, dtype)); - OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, CV_REDUCE_SUM, dtype)); + OCL_OFF(cv::reduce(src_roi, dst_roi, dim, REDUCE_SUM, dtype)); + OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, REDUCE_SUM, dtype)); double eps = ddepth <= CV_32S ? 1 : 7e-4; OCL_EXPECT_MATS_NEAR(dst, eps); @@ -1835,8 +1835,8 @@ OCL_TEST_P(ReduceMax, Mat) { generateTestData(); - OCL_OFF(cv::reduce(src_roi, dst_roi, dim, CV_REDUCE_MAX, dtype)); - OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, CV_REDUCE_MAX, dtype)); + OCL_OFF(cv::reduce(src_roi, dst_roi, dim, REDUCE_MAX, dtype)); + OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, REDUCE_MAX, dtype)); OCL_EXPECT_MATS_NEAR(dst, 0); } @@ -1850,8 +1850,8 @@ OCL_TEST_P(ReduceMin, Mat) { generateTestData(); - OCL_OFF(cv::reduce(src_roi, dst_roi, dim, CV_REDUCE_MIN, dtype)); - OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, CV_REDUCE_MIN, dtype)); + OCL_OFF(cv::reduce(src_roi, dst_roi, dim, REDUCE_MIN, dtype)); + OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, REDUCE_MIN, dtype)); OCL_EXPECT_MATS_NEAR(dst, 0); } @@ -1865,8 +1865,8 @@ OCL_TEST_P(ReduceAvg, Mat) { generateTestData(); - OCL_OFF(cv::reduce(src_roi, dst_roi, dim, CV_REDUCE_AVG, dtype)); - OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, CV_REDUCE_AVG, dtype)); + OCL_OFF(cv::reduce(src_roi, dst_roi, dim, REDUCE_AVG, dtype)); + OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, REDUCE_AVG, dtype)); double eps = ddepth <= CV_32S ? 1 : 6e-6; OCL_EXPECT_MATS_NEAR(dst, eps); diff --git a/modules/core/test/test_io.cpp b/modules/core/test/test_io.cpp index 3712be9f2e..4218cb9297 100644 --- a/modules/core/test/test_io.cpp +++ b/modules/core/test/test_io.cpp @@ -1918,5 +1918,29 @@ TEST(Core_InputOutput, FileStorage_16F_json) test_20279(fs); } +TEST(Core_InputOutput, FileStorage_invalid_path_regression_21448_YAML) +{ + FileStorage fs("invalid_path/test.yaml", cv::FileStorage::WRITE); + EXPECT_FALSE(fs.isOpened()); + EXPECT_ANY_THROW(fs.write("K", 1)); + fs.release(); +} + +TEST(Core_InputOutput, FileStorage_invalid_path_regression_21448_XML) +{ + FileStorage fs("invalid_path/test.xml", cv::FileStorage::WRITE); + EXPECT_FALSE(fs.isOpened()); + EXPECT_ANY_THROW(fs.write("K", 1)); + fs.release(); +} + +TEST(Core_InputOutput, FileStorage_invalid_path_regression_21448_JSON) +{ + FileStorage fs("invalid_path/test.json", cv::FileStorage::WRITE); + EXPECT_FALSE(fs.isOpened()); + EXPECT_ANY_THROW(fs.write("K", 1)); + fs.release(); +} + }} // namespace diff --git a/modules/core/test/test_mat.cpp b/modules/core/test/test_mat.cpp index bd0f3897d2..0c54b55ac6 100644 --- a/modules/core/test/test_mat.cpp +++ b/modules/core/test/test_mat.cpp @@ -93,7 +93,7 @@ int Core_ReduceTest::checkOp( const Mat& src, int dstType, int opType, const Mat { int srcType = src.type(); bool support = false; - if( opType == CV_REDUCE_SUM || opType == CV_REDUCE_AVG ) + if( opType == REDUCE_SUM || opType == REDUCE_AVG ) { if( srcType == CV_8U && (dstType == CV_32S || dstType == CV_32F || dstType == CV_64F) ) support = true; @@ -106,7 +106,7 @@ int Core_ReduceTest::checkOp( const Mat& src, int dstType, int opType, const Mat if( srcType == CV_64F && dstType == CV_64F) support = true; } - else if( opType == CV_REDUCE_MAX ) + else if( opType == REDUCE_MAX ) { if( srcType == CV_8U && dstType == CV_8U ) support = true; @@ -115,7 +115,7 @@ int Core_ReduceTest::checkOp( const Mat& src, int dstType, int opType, const Mat if( srcType == CV_64F && dstType == CV_64F ) support = true; } - else if( opType == CV_REDUCE_MIN ) + else if( opType == REDUCE_MIN ) { if( srcType == CV_8U && dstType == CV_8U) support = true; @@ -128,7 +128,7 @@ int Core_ReduceTest::checkOp( const Mat& src, int dstType, int opType, const Mat return cvtest::TS::OK; double eps = 0.0; - if ( opType == CV_REDUCE_SUM || opType == CV_REDUCE_AVG ) + if ( opType == REDUCE_SUM || opType == REDUCE_AVG ) { if ( dstType == CV_32F ) eps = 1.e-5; @@ -152,10 +152,10 @@ int Core_ReduceTest::checkOp( const Mat& src, int dstType, int opType, const Mat if( check ) { char msg[100]; - const char* opTypeStr = opType == CV_REDUCE_SUM ? "CV_REDUCE_SUM" : - opType == CV_REDUCE_AVG ? "CV_REDUCE_AVG" : - opType == CV_REDUCE_MAX ? "CV_REDUCE_MAX" : - opType == CV_REDUCE_MIN ? "CV_REDUCE_MIN" : "unknown operation type"; + const char* opTypeStr = opType == REDUCE_SUM ? "REDUCE_SUM" : + opType == REDUCE_AVG ? "REDUCE_AVG" : + opType == REDUCE_MAX ? "REDUCE_MAX" : + opType == REDUCE_MIN ? "REDUCE_MIN" : "unknown operation type"; string srcTypeStr, dstTypeStr; getMatTypeStr( src.type(), srcTypeStr ); getMatTypeStr( dstType, dstTypeStr ); @@ -195,19 +195,19 @@ int Core_ReduceTest::checkCase( int srcType, int dstType, int dim, Size sz ) CV_Assert( 0 ); // 1. sum - tempCode = checkOp( src, dstType, CV_REDUCE_SUM, sum, dim ); + tempCode = checkOp( src, dstType, REDUCE_SUM, sum, dim ); code = tempCode != cvtest::TS::OK ? tempCode : code; // 2. avg - tempCode = checkOp( src, dstType, CV_REDUCE_AVG, avg, dim ); + tempCode = checkOp( src, dstType, REDUCE_AVG, avg, dim ); code = tempCode != cvtest::TS::OK ? tempCode : code; // 3. max - tempCode = checkOp( src, dstType, CV_REDUCE_MAX, max, dim ); + tempCode = checkOp( src, dstType, REDUCE_MAX, max, dim ); code = tempCode != cvtest::TS::OK ? tempCode : code; // 4. min - tempCode = checkOp( src, dstType, CV_REDUCE_MIN, min, dim ); + tempCode = checkOp( src, dstType, REDUCE_MIN, min, dim ); code = tempCode != cvtest::TS::OK ? tempCode : code; return code; @@ -315,7 +315,7 @@ TEST(Core_PCA, accuracy) Mat rBackPrjTestPoints = rPCA.backProject( rPrjTestPoints ); Mat avg(1, sz.width, CV_32FC1 ); - cv::reduce( rPoints, avg, 0, CV_REDUCE_AVG ); + cv::reduce( rPoints, avg, 0, REDUCE_AVG ); Mat Q = rPoints - repeat( avg, rPoints.rows, 1 ), Qt = Q.t(), eval, evec; Q = Qt * Q; Q = Q /(float)rPoints.rows; @@ -1559,10 +1559,10 @@ TEST(Reduce, regression_should_fail_bug_4594) cv::Mat src = cv::Mat::eye(4, 4, CV_8U); std::vector dst; - EXPECT_THROW(cv::reduce(src, dst, 0, CV_REDUCE_MIN, CV_32S), cv::Exception); - EXPECT_THROW(cv::reduce(src, dst, 0, CV_REDUCE_MAX, CV_32S), cv::Exception); - EXPECT_NO_THROW(cv::reduce(src, dst, 0, CV_REDUCE_SUM, CV_32S)); - EXPECT_NO_THROW(cv::reduce(src, dst, 0, CV_REDUCE_AVG, CV_32S)); + EXPECT_THROW(cv::reduce(src, dst, 0, REDUCE_MIN, CV_32S), cv::Exception); + EXPECT_THROW(cv::reduce(src, dst, 0, REDUCE_MAX, CV_32S), cv::Exception); + EXPECT_NO_THROW(cv::reduce(src, dst, 0, REDUCE_SUM, CV_32S)); + EXPECT_NO_THROW(cv::reduce(src, dst, 0, REDUCE_AVG, CV_32S)); } TEST(Mat, push_back_vector) diff --git a/modules/core/test/test_math.cpp b/modules/core/test/test_math.cpp index 0b083b3e6d..c23bf5c7eb 100644 --- a/modules/core/test/test_math.cpp +++ b/modules/core/test/test_math.cpp @@ -3023,7 +3023,7 @@ TEST(CovariationMatrixVectorOfMatWithMean, accuracy) cv::randu(src,cv::Scalar(-128), cv::Scalar(128)); cv::Mat goldMean; - cv::reduce(src,goldMean,0 ,CV_REDUCE_AVG, CV_32F); + cv::reduce(src,goldMean,0 ,REDUCE_AVG, CV_32F); cv::calcCovarMatrix(src,gold,goldMean,singleMatFlags,CV_32F); diff --git a/modules/core/test/test_misc.cpp b/modules/core/test/test_misc.cpp index d9df475fa6..8ed0afe771 100644 --- a/modules/core/test/test_misc.cpp +++ b/modules/core/test/test_misc.cpp @@ -4,6 +4,15 @@ #include "test_precomp.hpp" #include +#include "opencv2/core/utils/logger.hpp" + +#include + +#ifdef CV_CXX11 +#include +#include +#endif + namespace opencv_test { namespace { TEST(Core_OutputArrayCreate, _1997) @@ -243,6 +252,62 @@ TEST(Core_Parallel, propagate_exceptions) }, cv::Exception); } +class FPDenormalsHintCheckerParallelLoopBody : public cv::ParallelLoopBody +{ +public: + FPDenormalsHintCheckerParallelLoopBody() + : isOK(true) + { + state_values_to_check = cv::details::saveFPDenormalsState(base_state); + } + ~FPDenormalsHintCheckerParallelLoopBody() {} + void operator()(const cv::Range& r) const + { + CV_UNUSED(r); + cv::details::FPDenormalsModeState state; + if (cv::details::saveFPDenormalsState(state)) + { + for (int i = 0; i < state_values_to_check; ++i) + { + if (base_state.reserved[i] != state.reserved[i]) + { + CV_LOG_ERROR(NULL, cv::format("FP state[%d] mismatch: base=0x%08x thread=0x%08x", i, base_state.reserved[i], state.reserved[i])); + isOK = false; + cv::details::restoreFPDenormalsState(base_state); + } + } + } + else + { + // FP state is not supported + // no checks + } +#ifdef CV_CXX11 + std::this_thread::sleep_for(std::chrono::milliseconds(100)); +#endif + } + + cv::details::FPDenormalsModeState base_state; + int state_values_to_check; + + mutable bool isOK; +}; + +TEST(Core_Parallel, propagate_fp_denormals_ignore_hint) +{ + int nThreads = std::max(1, cv::getNumThreads()) * 3; + for (int i = 0; i < 4; ++i) + { + SCOPED_TRACE(cv::format("Case=%d: FP denormals ignore hint: %s\n", i, ((i & 1) != 0) ? "enable" : "disable")); + FPDenormalsIgnoreHintScope fp_denormals_scope((i & 1) != 0); + FPDenormalsHintCheckerParallelLoopBody job; + ASSERT_NO_THROW({ + parallel_for_(cv::Range(0, nThreads), job); + }); + EXPECT_TRUE(job.isOK); + } +} + TEST(Core_Version, consistency) { // this test verifies that OpenCV version loaded in runtime diff --git a/modules/core/test/test_precomp.hpp b/modules/core/test/test_precomp.hpp index a82f5cc12c..81ddf45de9 100644 --- a/modules/core/test/test_precomp.hpp +++ b/modules/core/test/test_precomp.hpp @@ -6,9 +6,6 @@ #include "opencv2/ts.hpp" #include "opencv2/ts/ocl_test.hpp" -#include "opencv2/core/core_c.h" - -#include "opencv2/core/cvdef.h" #include "opencv2/core/private.hpp" #include "opencv2/core/hal/hal.hpp" diff --git a/modules/core/test/test_umat.cpp b/modules/core/test/test_umat.cpp index c323d17c06..a89972762a 100644 --- a/modules/core/test/test_umat.cpp +++ b/modules/core/test/test_umat.cpp @@ -1398,8 +1398,8 @@ TEST(UMat, testTempObjects_Mat_issue_8693) randu(srcUMat, -1.f, 1.f); srcUMat.copyTo(srcMat); - reduce(srcUMat, srcUMat, 0, CV_REDUCE_SUM); - reduce(srcMat, srcMat, 0, CV_REDUCE_SUM); + reduce(srcUMat, srcUMat, 0, REDUCE_SUM); + reduce(srcMat, srcMat, 0, REDUCE_SUM); srcUMat.convertTo(srcUMat, CV_64FC1); srcMat.convertTo(srcMat, CV_64FC1); diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index dfc08e8b9b..a9540f1088 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -165,24 +165,13 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") endif() set(dnn_runtime_libs "") -if(INF_ENGINE_TARGET) - set(use_nn_builder OFF) - if(TARGET inference_engine_nn_builder OR # custom imported target - TARGET IE::inference_engine_nn_builder OR # default imported target via InferenceEngineConfig.cmake - INF_ENGINE_RELEASE VERSION_LESS "2020000000") # compatibility with older versions on IE - set(use_nn_builder ON) + +ocv_option(OPENCV_DNN_OPENVINO "Build with OpenVINO support (2021.4+)" (TARGET ocv.3rdparty.openvino)) +if(TARGET ocv.3rdparty.openvino AND OPENCV_DNN_OPENVINO) + if(NOT HAVE_OPENVINO AND NOT HAVE_NGRAPH) + message(FATAL_ERROR "DNN: Inference Engine is not supported without enabled 'nGraph'. Check build configuration.") endif() - ocv_option(OPENCV_DNN_IE_NN_BUILDER_2019 "Build with Inference Engine NN Builder API support" ${use_nn_builder}) # future: NOT HAVE_NGRAPH - if(OPENCV_DNN_IE_NN_BUILDER_2019) - message(STATUS "DNN: Enabling Inference Engine NN Builder API support") - add_definitions(-DHAVE_DNN_IE_NN_BUILDER_2019=1) - endif() - list(APPEND dnn_runtime_libs ${INF_ENGINE_TARGET}) -endif() -if(HAVE_NGRAPH) - message(STATUS "DNN: Enabling Inference Engine nGraph API support") - add_definitions(-DHAVE_DNN_NGRAPH) - list(APPEND dnn_runtime_libs ngraph::ngraph) + list(APPEND dnn_runtime_libs ocv.3rdparty.openvino) endif() ocv_glob_module_sources(${sources_options} SOURCES ${fw_srcs} ${webnn_srcs}) @@ -193,7 +182,7 @@ ocv_add_accuracy_tests(${dnn_runtime_libs}) set(perf_path "${CMAKE_CURRENT_LIST_DIR}/perf") file(GLOB_RECURSE perf_srcs "${perf_path}/*.cpp") file(GLOB_RECURSE perf_hdrs "${perf_path}/*.hpp" "${perf_path}/*.h") -ocv_add_perf_tests(${INF_ENGINE_TARGET} +ocv_add_perf_tests(${dnn_runtime_libs} FILES test_common "${CMAKE_CURRENT_LIST_DIR}/test/test_common.hpp" "${CMAKE_CURRENT_LIST_DIR}/test/test_common.impl.hpp" FILES Src ${perf_srcs} FILES Include ${perf_hdrs} diff --git a/modules/dnn/include/opencv2/dnn/dict.hpp b/modules/dnn/include/opencv2/dnn/dict.hpp index 463d314bee..059ce9b28e 100644 --- a/modules/dnn/include/opencv2/dnn/dict.hpp +++ b/modules/dnn/include/opencv2/dnn/dict.hpp @@ -60,13 +60,13 @@ CV__DNN_INLINE_NS_BEGIN struct CV_EXPORTS_W DictValue { DictValue(const DictValue &r); - DictValue(bool i) : type(Param::INT), pi(new AutoBuffer) { (*pi)[0] = i ? 1 : 0; } //!< Constructs integer scalar - DictValue(int64 i = 0) : type(Param::INT), pi(new AutoBuffer) { (*pi)[0] = i; } //!< Constructs integer scalar - CV_WRAP DictValue(int i) : type(Param::INT), pi(new AutoBuffer) { (*pi)[0] = i; } //!< Constructs integer scalar - DictValue(unsigned p) : type(Param::INT), pi(new AutoBuffer) { (*pi)[0] = p; } //!< Constructs integer scalar - CV_WRAP DictValue(double p) : type(Param::REAL), pd(new AutoBuffer) { (*pd)[0] = p; } //!< Constructs floating point scalar - CV_WRAP DictValue(const String &s) : type(Param::STRING), ps(new AutoBuffer) { (*ps)[0] = s; } //!< Constructs string scalar - DictValue(const char *s) : type(Param::STRING), ps(new AutoBuffer) { (*ps)[0] = s; } //!< @overload + explicit DictValue(bool i) : type(Param::INT), pi(new AutoBuffer) { (*pi)[0] = i ? 1 : 0; } //!< Constructs integer scalar + explicit DictValue(int64 i = 0) : type(Param::INT), pi(new AutoBuffer) { (*pi)[0] = i; } //!< Constructs integer scalar + CV_WRAP explicit DictValue(int i) : type(Param::INT), pi(new AutoBuffer) { (*pi)[0] = i; } //!< Constructs integer scalar + explicit DictValue(unsigned p) : type(Param::INT), pi(new AutoBuffer) { (*pi)[0] = p; } //!< Constructs integer scalar + CV_WRAP explicit DictValue(double p) : type(Param::REAL), pd(new AutoBuffer) { (*pd)[0] = p; } //!< Constructs floating point scalar + CV_WRAP explicit DictValue(const String &s) : type(Param::STRING), ps(new AutoBuffer) { (*ps)[0] = s; } //!< Constructs string scalar + explicit DictValue(const char *s) : type(Param::STRING), ps(new AutoBuffer) { (*ps)[0] = s; } //!< @overload template static DictValue arrayInt(TypeIter begin, int size); //!< Constructs integer array diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index d6b29cfcf3..97033a313e 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -134,7 +134,7 @@ CV__DNN_INLINE_NS_BEGIN class BackendNode { public: - BackendNode(int backendId); + explicit BackendNode(int backendId); virtual ~BackendNode(); //!< Virtual destructor to make polymorphism. @@ -277,18 +277,18 @@ CV__DNN_INLINE_NS_BEGIN * Each layer input and output can be labeled to easily identify them using "%[.output_name]" notation. * This method maps label of input blob to its index into input vector. */ - virtual int inputNameToIndex(String inputName); + virtual int inputNameToIndex(String inputName); // FIXIT const /** @brief Returns index of output blob in output array. * @see inputNameToIndex() */ - CV_WRAP virtual int outputNameToIndex(const String& outputName); + CV_WRAP virtual int outputNameToIndex(const String& outputName); // FIXIT const /** * @brief Ask layer if it support specific backend for doing computations. * @param[in] backendId computation backend identifier. * @see Backend */ - virtual bool supportBackend(int backendId); + virtual bool supportBackend(int backendId); // FIXIT const /** * @brief Returns Halide backend node. @@ -302,8 +302,6 @@ CV__DNN_INLINE_NS_BEGIN */ virtual Ptr initHalide(const std::vector > &inputs); - virtual Ptr initInfEngine(const std::vector > &inputs); - virtual Ptr initNgraph(const std::vector > &inputs, const std::vector >& nodes); virtual Ptr initVkCom(const std::vector > &inputs); @@ -495,18 +493,29 @@ CV__DNN_INLINE_NS_BEGIN /** @brief Converts string name of the layer to the integer identifier. * @returns id of the layer, or -1 if the layer wasn't found. */ - CV_WRAP int getLayerId(const String &layer); + CV_WRAP int getLayerId(const String &layer) const; CV_WRAP std::vector getLayerNames() const; - /** @brief Container for strings and integers. */ + /** @brief Container for strings and integers. + * + * @deprecated Use getLayerId() with int result. + */ typedef DictValue LayerId; /** @brief Returns pointer to layer with specified id or name which the network use. */ - CV_WRAP Ptr getLayer(LayerId layerId); + CV_WRAP Ptr getLayer(int layerId) const; + /** @overload + * @deprecated Use int getLayerId(const String &layer) + */ + CV_WRAP inline Ptr getLayer(const String& layerName) const { return getLayer(getLayerId(layerName)); } + /** @overload + * @deprecated to be removed + */ + CV_WRAP Ptr getLayer(const LayerId& layerId) const; /** @brief Returns pointers to input layers of specific layer. */ - std::vector > getLayerInputs(LayerId layerId); // FIXIT: CV_WRAP + std::vector > getLayerInputs(int layerId) const; // FIXIT: CV_WRAP /** @brief Connects output of the first layer to input of the second layer. * @param outPin descriptor of the first layer output. @@ -531,6 +540,18 @@ CV__DNN_INLINE_NS_BEGIN */ void connect(int outLayerId, int outNum, int inpLayerId, int inpNum); + /** @brief Registers network output with name + * + * Function may create additional 'Identity' layer. + * + * @param outputName identifier of the output + * @param layerId identifier of the second layer + * @param outputPort number of the second layer input + * + * @returns index of bound layer (the same as layerId or newly created) + */ + int registerOutput(const std::string& outputName, int layerId, int outputPort); + /** @brief Sets outputs names of the network input pseudo layer. * * Each net always has special own the network input pseudo layer with id=0. @@ -662,20 +683,26 @@ CV__DNN_INLINE_NS_BEGIN * @note If shape of the new blob differs from the previous shape, * then the following forward pass may fail. */ - CV_WRAP void setParam(LayerId layer, int numParam, const Mat &blob); + CV_WRAP void setParam(int layer, int numParam, const Mat &blob); + CV_WRAP inline void setParam(const String& layerName, int numParam, const Mat &blob) { return setParam(getLayerId(layerName), numParam, blob); } /** @brief Returns parameter blob of the layer. * @param layer name or id of the layer. * @param numParam index of the layer parameter in the Layer::blobs array. * @see Layer::blobs */ - CV_WRAP Mat getParam(LayerId layer, int numParam = 0); + CV_WRAP Mat getParam(int layer, int numParam = 0) const; + CV_WRAP inline Mat getParam(const String& layerName, int numParam = 0) const { return getParam(getLayerId(layerName), numParam); } /** @brief Returns indexes of layers with unconnected outputs. + * + * FIXIT: Rework API to registerOutput() approach, deprecate this call */ CV_WRAP std::vector getUnconnectedOutLayers() const; /** @brief Returns names of layers with unconnected outputs. + * + * FIXIT: Rework API to registerOutput() approach, deprecate this call */ CV_WRAP std::vector getUnconnectedOutLayersNames() const; diff --git a/modules/dnn/include/opencv2/dnn/layer.hpp b/modules/dnn/include/opencv2/dnn/layer.hpp index 8500599371..a4d167564d 100644 --- a/modules/dnn/include/opencv2/dnn/layer.hpp +++ b/modules/dnn/include/opencv2/dnn/layer.hpp @@ -66,6 +66,9 @@ public: //! Unregisters registered layer with specified type name. Thread-safe. static void unregisterLayer(const String &type); + //! Check if layer is registered. + static bool isLayerRegistered(const std::string& type); + /** @brief Creates instance of registered layer. * @param type type name of creating layer. * @param params parameters which will be used for layer initialization. diff --git a/modules/dnn/include/opencv2/dnn/shape_utils.hpp b/modules/dnn/include/opencv2/dnn/shape_utils.hpp index 4c610f6cef..9bbbc806a8 100644 --- a/modules/dnn/include/opencv2/dnn/shape_utils.hpp +++ b/modules/dnn/include/opencv2/dnn/shape_utils.hpp @@ -184,7 +184,8 @@ static inline MatShape concat(const MatShape& a, const MatShape& b) return c; } -static inline std::string toString(const MatShape& shape, const String& name = "") +template +static inline std::string toString(const std::vector<_Tp>& shape, const String& name = "") { std::ostringstream ss; if (!name.empty()) @@ -195,11 +196,14 @@ static inline std::string toString(const MatShape& shape, const String& name = " ss << " ]"; return ss.str(); } -static inline void print(const MatShape& shape, const String& name = "") + +template +static inline void print(const std::vector<_Tp>& shape, const String& name = "") { std::cout << toString(shape, name) << std::endl; } -static inline std::ostream& operator<<(std::ostream &out, const MatShape& shape) +template +static inline std::ostream& operator<<(std::ostream &out, const std::vector<_Tp>& shape) { out << toString(shape); return out; diff --git a/modules/dnn/include/opencv2/dnn/utils/inference_engine.hpp b/modules/dnn/include/opencv2/dnn/utils/inference_engine.hpp index 333b1bfdd2..b81806ed5a 100644 --- a/modules/dnn/include/opencv2/dnn/utils/inference_engine.hpp +++ b/modules/dnn/include/opencv2/dnn/utils/inference_engine.hpp @@ -15,14 +15,18 @@ CV__DNN_INLINE_NS_BEGIN /* Values for 'OPENCV_DNN_BACKEND_INFERENCE_ENGINE_TYPE' parameter */ +/// @deprecated #define CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API "NN_BUILDER" +/// @deprecated #define CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH "NGRAPH" /** @brief Returns Inference Engine internal backend API. * * See values of `CV_DNN_BACKEND_INFERENCE_ENGINE_*` macros. * - * Default value is controlled through `OPENCV_DNN_BACKEND_INFERENCE_ENGINE_TYPE` runtime parameter (environment variable). + * `OPENCV_DNN_BACKEND_INFERENCE_ENGINE_TYPE` runtime parameter (environment variable) is ignored since 4.6.0. + * + * @deprecated */ CV_EXPORTS_W cv::String getInferenceEngineBackendType(); @@ -31,6 +35,8 @@ CV_EXPORTS_W cv::String getInferenceEngineBackendType(); * See values of `CV_DNN_BACKEND_INFERENCE_ENGINE_*` macros. * * @returns previous value of internal backend API + * + * @deprecated */ CV_EXPORTS_W cv::String setInferenceEngineBackendType(const cv::String& newBackendType); diff --git a/modules/dnn/misc/objc/gen_dict.json b/modules/dnn/misc/objc/gen_dict.json index e6d561fba0..6072bdfc01 100644 --- a/modules/dnn/misc/objc/gen_dict.json +++ b/modules/dnn/misc/objc/gen_dict.json @@ -18,8 +18,12 @@ "(long)getFLOPS:(NSArray*)netInputShapes" : { "getFLOPS" : {"name" : "getFLOPSWithNetInputShapes"} }, "(long)getFLOPS:(int)layerId netInputShape:(IntVector*)netInputShape" : { "getFLOPS" : {"name" : "getFLOPSWithLayerId"} }, "(long)getFLOPS:(int)layerId netInputShapes:(NSArray*)netInputShapes" : { "getFLOPS" : {"name" : "getFLOPSWithLayerId"} }, + "(Layer*)getLayer:(NSString*)layerName" : { "getLayer" : {"name" : "getLayerByName"} }, + "(Layer*)getLayer:(DictValue*)layerId" : { "getLayer" : {"name" : "getLayerByDictValue"} }, "(void)getLayersShapes:(IntVector*)netInputShape layersIds:(IntVector*)layersIds inLayersShapes:(NSMutableArray*>*)inLayersShapes outLayersShapes:(NSMutableArray*>*)outLayersShapes" : { "getLayersShapes" : {"name" : "getLayersShapesWithNetInputShape"} }, - "(void)getLayersShapes:(NSArray*)netInputShapes layersIds:(IntVector*)layersIds inLayersShapes:(NSMutableArray*>*)inLayersShapes outLayersShapes:(NSMutableArray*>*)outLayersShapes" : { "getLayersShapes" : {"name" : "getLayersShapesWithNetInputShapes"} } + "(void)getLayersShapes:(NSArray*)netInputShapes layersIds:(IntVector*)layersIds inLayersShapes:(NSMutableArray*>*)inLayersShapes outLayersShapes:(NSMutableArray*>*)outLayersShapes" : { "getLayersShapes" : {"name" : "getLayersShapesWithNetInputShapes"} }, + "(Mat*)getParam:(NSString*)layerName numParam:(int)numParam" : { "getParam" : {"name" : "getParamByName"} }, + "(void)setParam:(NSString*)layerName numParam:(int)numParam blob:(Mat*)blob" : { "setParam" : {"name" : "setParamByName"} } } }, "type_dict": { diff --git a/modules/dnn/src/caffe/caffe_importer.cpp b/modules/dnn/src/caffe/caffe_importer.cpp index 7fb64c7c0d..a8d2f28ca6 100644 --- a/modules/dnn/src/caffe/caffe_importer.cpp +++ b/modules/dnn/src/caffe/caffe_importer.cpp @@ -53,6 +53,8 @@ #include "caffe_io.hpp" #endif +#include + namespace cv { namespace dnn { CV__DNN_INLINE_NS_BEGIN @@ -88,6 +90,8 @@ MatShape parseBlobShape(const caffe::BlobShape& _input_shape) class CaffeImporter { + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + caffe::NetParameter net; caffe::NetParameter netBinary; diff --git a/modules/dnn/src/darknet/darknet_importer.cpp b/modules/dnn/src/darknet/darknet_importer.cpp index f1269bd979..b5767af405 100644 --- a/modules/dnn/src/darknet/darknet_importer.cpp +++ b/modules/dnn/src/darknet/darknet_importer.cpp @@ -51,6 +51,7 @@ #include "darknet_io.hpp" +#include namespace cv { namespace dnn { @@ -61,6 +62,8 @@ namespace class DarknetImporter { + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + darknet::NetParameter net; public: diff --git a/modules/dnn/src/debug_utils.cpp b/modules/dnn/src/debug_utils.cpp index d951205bd8..0e1ba10236 100644 --- a/modules/dnn/src/debug_utils.cpp +++ b/modules/dnn/src/debug_utils.cpp @@ -37,11 +37,8 @@ void skipModelImport(bool skip) void detail::LayerHandler::addMissing(const std::string& name, const std::string& type) { - cv::AutoLock lock(getLayerFactoryMutex()); - auto& registeredLayers = getLayerFactoryImpl(); - // If we didn't add it, but can create it, it's custom and not missing. - if (layers.find(type) == layers.end() && registeredLayers.find(type) != registeredLayers.end()) + if (!contains(type) && LayerFactory::isLayerRegistered(type)) { return; } @@ -51,17 +48,17 @@ void detail::LayerHandler::addMissing(const std::string& name, const std::string bool detail::LayerHandler::contains(const std::string& type) const { - return layers.find(type) != layers.end(); + return layers.count(type) != 0; } -void detail::LayerHandler::printMissing() +void detail::LayerHandler::printMissing() const { if (layers.empty()) { return; } - std::stringstream ss; + std::ostringstream ss; ss << "DNN: Not supported types:\n"; for (const auto& type_names : layers) { diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 67312dba78..954ada50f2 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -66,6 +66,8 @@ #include #include +#include + #include #include @@ -175,48 +177,29 @@ private: #ifdef HAVE_INF_ENGINE if (checkIETarget(DNN_TARGET_CPU)) { -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_CPU)); -#endif #ifdef HAVE_DNN_NGRAPH backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_CPU)); #endif } if (checkIETarget(DNN_TARGET_MYRIAD)) { -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_MYRIAD)); -#endif #ifdef HAVE_DNN_NGRAPH backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_MYRIAD)); #endif } if (checkIETarget(DNN_TARGET_HDDL)) { -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_HDDL)); -#endif #ifdef HAVE_DNN_NGRAPH backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_HDDL)); #endif } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - if (checkIETarget(DNN_TARGET_FPGA)) - backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_FPGA)); -#endif #ifdef HAVE_OPENCL if (cv::ocl::useOpenCL() && ocl::Device::getDefault().isIntel()) { if (checkIETarget(DNN_TARGET_OPENCL)) { -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_OPENCL)); -#endif #ifdef HAVE_DNN_NGRAPH backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL)); #endif } if (checkIETarget(DNN_TARGET_OPENCL_FP16)) { -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, DNN_TARGET_OPENCL_FP16)); -#endif #ifdef HAVE_DNN_NGRAPH backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL_FP16)); #endif @@ -271,7 +254,7 @@ std::vector getAvailableTargets(Backend be) be = (Backend)PARAM_DNN_BACKEND_DEFAULT; #ifdef HAVE_INF_ENGINE if (be == DNN_BACKEND_INFERENCE_ENGINE) - be = getInferenceEngineBackendTypeParam(); + be = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; #endif std::vector result; @@ -635,8 +618,7 @@ struct DataLayer : public Layer virtual bool supportBackend(int backendId) CV_OVERRIDE { - return backendId == DNN_BACKEND_OPENCV || - (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && inputsData.size() == 1); + return backendId == DNN_BACKEND_OPENCV; } void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE @@ -827,39 +809,6 @@ struct DataLayer : public Layer } } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE - { - CV_CheckEQ(inputsData.size(), (size_t)1, ""); - CV_CheckEQ(inputsData[0].dims, 4, ""); - const size_t numChannels = inputsData[0].size[1]; - CV_Assert(numChannels <= 4); - - // Scale - InferenceEngine::TensorDesc td(InferenceEngine::Precision::FP32, {numChannels}, - InferenceEngine::Layout::C); - auto weights = InferenceEngine::make_shared_blob(td); - weights->allocate(); - - float* weight_buf = weights->buffer().as(); - std::fill(weight_buf, weight_buf + numChannels, scaleFactors[0]); - - // Mean subtraction - auto biases = InferenceEngine::make_shared_blob(td); - biases->allocate(); - float* bias_buf = biases->buffer().as(); - - for (int i = 0; i < numChannels; ++i) - { - bias_buf[i] = -means[0][i] * scaleFactors[0]; - } - - InferenceEngine::Builder::Layer ieLayer = InferenceEngine::Builder::ScaleShiftLayer(name); - addConstantData("weights", weights, ieLayer); - addConstantData("biases", biases, ieLayer); - return Ptr(new InfEngineBackendNode(ieLayer)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 std::vector outNames; std::vector shapes; @@ -895,11 +844,11 @@ public: // layer blob. int numReferences(const LayerPin& lp) { - std::map::iterator mapIt = reuseMap.find(lp); + std::map::const_iterator mapIt = reuseMap.find(lp); CV_Assert(mapIt != reuseMap.end()); LayerPin memHost = mapIt->second; - std::map::iterator refIt = refCounter.find(memHost); + std::map::const_iterator refIt = refCounter.find(memHost); CV_Assert(refIt != refCounter.end()); return refIt->second; } @@ -927,7 +876,7 @@ public: // Decrease references counter to allocated memory inside specific blob. void releaseReference(const LayerPin& lp) { - std::map::iterator mapIt = reuseMap.find(lp); + std::map::const_iterator mapIt = reuseMap.find(lp); CV_Assert(mapIt != reuseMap.end()); std::map::iterator refIt = refCounter.find(mapIt->second); @@ -951,8 +900,8 @@ public: Mat bestBlob; LayerPin bestBlobPin; - std::map::iterator hostIt; - std::map::iterator refIt; + std::map::const_iterator hostIt; + std::map::const_iterator refIt; const int targetTotal = total(shape); int bestBlobTotal = INT_MAX; @@ -964,7 +913,7 @@ public: // it might be used as output. if (refIt != refCounter.end() && refIt->second == 0) { - Mat& unusedBlob = hostIt->second; + const Mat& unusedBlob = hostIt->second; if (unusedBlob.total() >= targetTotal && unusedBlob.total() < bestBlobTotal && unusedBlob.type() == dtype) @@ -1117,18 +1066,14 @@ static Ptr wrapMat(int backendId, int targetId, cv::Mat& m) } else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - return Ptr(new InfEngineBackendWrapper(targetId, m)); -#else - CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support"); -#endif + CV_ERROR_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019; } else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { #ifdef HAVE_DNN_NGRAPH return Ptr(new NgraphBackendWrapper(targetId, m)); #else - CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph"); + CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of OpenVINO / Inference Engine + nGraph"); #endif } else if (backendId == DNN_BACKEND_WEBNN) @@ -1177,7 +1122,7 @@ detail::NetImplBase::NetImplBase() // nothing } -std::string detail::NetImplBase::getDumpFileNameBase() +std::string detail::NetImplBase::getDumpFileNameBase() const { std::string dumpFileNameBase = cv::format("ocv_dnn_net_%05d_%02d", networkId, networkDumpCounter++); return dumpFileNameBase; @@ -1214,6 +1159,7 @@ struct Net::Impl : public detail::NetImplBase std::vector blobsToKeep; MapIdToLayerData layers; std::map layerNameToId; + std::map outputNameToId; // use registerOutput() to populate outputs BlobManager blobManager; int preferableBackend; int preferableTarget; @@ -1230,7 +1176,6 @@ struct Net::Impl : public detail::NetImplBase bool fusion; bool isAsync; std::vector layersTimings; - Mat output_blob; #ifdef HAVE_CUDA struct CudaInfo_t @@ -1276,7 +1221,7 @@ struct Net::Impl : public detail::NetImplBase } else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { - return wrapMat(preferableBackend, preferableTarget, host); + CV_ERROR_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019; } else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { @@ -1329,7 +1274,7 @@ struct Net::Impl : public detail::NetImplBase std::vector< std::reference_wrapper > compileList; compileList.reserve(64); for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) { - LayerData &ld = it->second; + LayerData& ld = it->second; Ptr layer = ld.layerInstance; if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skip) { @@ -1405,7 +1350,7 @@ struct Net::Impl : public detail::NetImplBase preferableBackend = (Backend)PARAM_DNN_BACKEND_DEFAULT; #ifdef HAVE_INF_ENGINE if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE) - preferableBackend = getInferenceEngineBackendTypeParam(); + preferableBackend = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; // = getInferenceEngineBackendTypeParam(); #endif CV_Assert(preferableBackend != DNN_BACKEND_OPENCV || @@ -1416,8 +1361,7 @@ struct Net::Impl : public detail::NetImplBase preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL); #ifdef HAVE_INF_ENGINE - if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || - preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { CV_Assert( (preferableTarget == DNN_TARGET_CPU && (!isArmComputePlugin() || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) || @@ -1522,19 +1466,19 @@ struct Net::Impl : public detail::NetImplBase } } - int getLayerId(const String &layerName) + int getLayerId(const String &layerName) const { - std::map::iterator it = layerNameToId.find(layerName); + std::map::const_iterator it = layerNameToId.find(layerName); return (it != layerNameToId.end()) ? it->second : -1; } - int getLayerId(int id) + int getLayerId(int id) const { - MapIdToLayerData::iterator it = layers.find(id); + MapIdToLayerData::const_iterator it = layers.find(id); return (it != layers.end()) ? id : -1; } - int getLayerId(DictValue &layerDesc) + int getLayerId(DictValue &layerDesc) const { if (layerDesc.isInt()) return getLayerId(layerDesc.get()); @@ -1545,23 +1489,23 @@ struct Net::Impl : public detail::NetImplBase return -1; } - String getLayerName(int id) + String getLayerName(int id) const { - MapIdToLayerData::iterator it = layers.find(id); + MapIdToLayerData::const_iterator it = layers.find(id); return (it != layers.end()) ? it->second.name : "(unknown layer)"; } - LayerData& getLayerData(int id) + LayerData& getLayerData(int id) const { - MapIdToLayerData::iterator it = layers.find(id); + MapIdToLayerData::const_iterator it = layers.find(id); if (it == layers.end()) CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id)); - return it->second; + return const_cast(it->second); } - LayerData& getLayerData(const String &layerName) + LayerData& getLayerData(const String &layerName) const { int id = getLayerId(layerName); @@ -1571,7 +1515,7 @@ struct Net::Impl : public detail::NetImplBase return getLayerData(id); } - LayerData& getLayerData(const DictValue &layerDesc) + LayerData& getLayerData(const DictValue &layerDesc) const { CV_Assert(layerDesc.isInt() || layerDesc.isString()); if (layerDesc.isInt()) @@ -1597,14 +1541,14 @@ struct Net::Impl : public detail::NetImplBase ld.inputBlobsId[inNum] = from; } - int resolvePinOutputName(LayerData &ld, const String &outName) + int resolvePinOutputName(LayerData &ld, const String &outName) const { if (outName.empty()) return 0; return ld.getLayerInstance()->outputNameToIndex(outName); } - LayerPin getPinByAlias(const String &layerName) + LayerPin getPinByAlias(const String &layerName) const { LayerPin pin; pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName); @@ -1615,13 +1559,17 @@ struct Net::Impl : public detail::NetImplBase return pin; } - std::vector getLayerOutPins(const String &layerName) + std::vector getLayerOutPins(const String &layerName) const { int lid = (layerName.empty()) ? 0 : getLayerId(layerName); - std::vector pins; + MapIdToLayerData::const_iterator it = layers.find(lid); + if (it == layers.end()) + CV_Error_(Error::StsOutOfRange, ("Layer #%d is not valid", lid)); + const size_t nOutputs = it->second.outputBlobs.size(); - for (int i = 0; i < layers[lid].outputBlobs.size(); i++) + std::vector pins; + for (int i = 0; i < nOutputs; i++) { pins.push_back(LayerPin(lid, i)); } @@ -1629,6 +1577,38 @@ struct Net::Impl : public detail::NetImplBase return pins; } + // FIXIT remove dtype + int addLayer(const String &name, const String &type, const int &dtype, LayerParams ¶ms) + { + int id = getLayerId(name); + if (id >= 0) + { + if (!DNN_DIAGNOSTICS_RUN || type != "NotImplemented") + { + CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net"); + return -1; + } + else + { + LayerData& ld = layers.find(id)->second; + ld.type = type; + ld.params = params; + return -1; + } + } + + id = ++lastLayerId; + layerNameToId.insert(std::make_pair(name, id)); + layers.insert(std::make_pair(id, LayerData(id, name, type, dtype, params))); + if (params.get("has_dynamic_shapes", false)) + hasDynamicShapes = true; + + if (dtype == CV_8S) + netWasQuantized = true; + + return id; + } + void connect(int outLayerId, int outNum, int inLayerId, int inNum) { CV_Assert(outLayerId < inLayerId); @@ -1638,6 +1618,40 @@ struct Net::Impl : public detail::NetImplBase addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum)); ldOut.requiredOutputs.insert(outNum); ldOut.consumers.push_back(LayerPin(inLayerId, outNum)); + + CV_LOG_VERBOSE(NULL, 0, "DNN: connect(" << outLayerId << ":" << outNum << " ==> " << inLayerId << ":" << inNum << ")"); + } + + int registerOutput(const std::string& outputName, int layerId, int outputPort) + { + int checkLayerId = getLayerId(outputName); + if (checkLayerId >= 0) + { + if (checkLayerId == layerId) + { + if (outputPort == 0) + { + // layer name correlates with its output name + CV_LOG_DEBUG(NULL, "DNN: register output='" << outputName << "': reuse layer with the same name and id=" << layerId << " to be linked"); + outputNameToId.insert(std::make_pair(outputName, layerId)); + return checkLayerId; + } + } + CV_Error_(Error::StsBadArg, ("Layer with name='%s' already exists id=%d (to be linked with %d:%d)", outputName.c_str(), checkLayerId, layerId, outputPort)); + } +#if 0 // TODO + if (outputPort == 0) + // make alias only, need to adopt getUnconnectedOutLayers() call +#endif + LayerParams outputLayerParams; + outputLayerParams.name = outputName; + outputLayerParams.type = "Identity"; + int dtype = CV_32F; // FIXIT remove + int outputLayerId = addLayer(outputLayerParams.name, outputLayerParams.type, dtype, outputLayerParams); + connect(layerId, outputPort, outputLayerId, 0); + CV_LOG_DEBUG(NULL, "DNN: register output='" << outputName << "' id=" << outputLayerId << " defined as " << layerId << ":" << outputPort); + outputNameToId.insert(std::make_pair(outputName, outputLayerId)); + return outputLayerId; } void initBackend(const std::vector& blobsToKeep_) @@ -1649,14 +1663,6 @@ struct Net::Impl : public detail::NetImplBase } else if (preferableBackend == DNN_BACKEND_HALIDE) initHalideBackend(); - else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - { -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - initInfEngineBackend(blobsToKeep_); -#else - CV_Assert(false && "This OpenCV version is built without Inference Engine NN Builder API support"); -#endif - } else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { #ifdef HAVE_DNN_NGRAPH @@ -1678,7 +1684,7 @@ struct Net::Impl : public detail::NetImplBase else if (preferableBackend == DNN_BACKEND_CUDA) initCUDABackend(blobsToKeep_); else - CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); + CV_Error(Error::StsNotImplemented, cv::format("Unknown backend identifier: %d", preferableBackend)); } void initHalideBackend() @@ -1736,322 +1742,17 @@ struct Net::Impl : public detail::NetImplBase } } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - // Before launching Inference Engine graph we need to specify output blobs. - // This function requests output blobs based on inputs references of - // layers from default backend or layers from different graphs. - void addInfEngineNetOutputs(LayerData &ld) - { - CV_TRACE_FUNCTION(); - Ptr layerNet; - if (ld.backendNodes.find(preferableBackend) != ld.backendNodes.end()) - { - Ptr node = ld.backendNodes[preferableBackend]; - if (!node.empty()) - { - Ptr ieNode = node.dynamicCast(); - CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty()); - layerNet = ieNode->net; - } - } - // For an every input reference we check that it belongs to one of - // the Inference Engine backend graphs. Request an output blob if it is. - // Do nothing if layer's input is from the same graph. - for (int i = 0; i < ld.inputBlobsId.size(); ++i) - { - LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; - Ptr inpNode = inpLd.backendNodes[preferableBackend]; - if (!inpNode.empty()) - { - Ptr ieInpNode = inpNode.dynamicCast(); - CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty()); - if (layerNet != ieInpNode->net) - { - // layerNet is empty or nodes are from different graphs. - ieInpNode->net->addOutput(ieInpNode->layer.getName()); - } - } - } - } - - void initInfEngineBackend(const std::vector& blobsToKeep_) - { - CV_TRACE_FUNCTION(); - CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, haveInfEngine()); - MapIdToLayerData::iterator it; - Ptr net; - - for (it = layers.begin(); it != layers.end(); ++it) - { - LayerData &ld = it->second; - if (ld.id == 0) - { - CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) || - (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size())); - for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) - { - InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]); -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000) - dataPtr->name = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]; -#else - dataPtr->setName(netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]); -#endif - } - } - else - { - for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) - { - InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]); -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000) - dataPtr->name = ld.name; -#else - dataPtr->setName(ld.name); -#endif - } - } - } - - if (skipInfEngineInit) - { - Ptr node = layers[lastLayerId].backendNodes[preferableBackend]; - CV_Assert(!node.empty()); - - Ptr ieNode = node.dynamicCast(); - CV_Assert(!ieNode.empty()); - ieNode->net->reset(); - - for (it = layers.begin(); it != layers.end(); ++it) - { - LayerData &ld = it->second; - if (ld.id == 0) - { - for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i) - { - InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.inputBlobsWrappers[i]); -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000) - dataPtr->name = netInputLayer->outNames[i]; -#else - dataPtr->setName(netInputLayer->outNames[i]); -#endif - } - } - else - { - for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) - { - InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]); -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000) - dataPtr->name = ld.name; -#else - dataPtr->setName(ld.name); -#endif - } - } - ieNode->net->addBlobs(ld.inputBlobsWrappers); - ieNode->net->addBlobs(ld.outputBlobsWrappers); - ld.skip = true; - } - layers[lastLayerId].skip = false; - ieNode->net->init((Target)preferableTarget); - return; - } - - // Build Inference Engine networks from sets of layers that support this - // backend. Split a whole model on several Inference Engine networks if - // some of layers are not implemented. - - bool supportsCPUFallback = preferableTarget == DNN_TARGET_CPU || - BackendRegistry::checkIETarget(DNN_TARGET_CPU); - - // Set of all input and output blobs wrappers for current network. - std::map > netBlobsWrappers; - for (it = layers.begin(); it != layers.end(); ++it) - { - LayerData &ld = it->second; - if (ld.id == 0 && ld.skip) - continue; - bool fused = ld.skip; - - Ptr layer = ld.layerInstance; - if (!fused && !layer->supportBackend(preferableBackend)) - { - bool customizable = ld.id != 0 && - INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R2) && - supportsCPUFallback; - // TODO: there is a bug in Myriad plugin with custom layers shape infer. - if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL) - { - for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i) - { - customizable = ld.inputBlobs[i]->size[0] == 1; - } - } - - // TODO: fix these workarounds - if (preferableTarget == DNN_TARGET_MYRIAD || - preferableTarget == DNN_TARGET_HDDL || - preferableTarget == DNN_TARGET_OPENCL || - preferableTarget == DNN_TARGET_OPENCL_FP16) - customizable &= ld.type != "Concat"; - - if (preferableTarget == DNN_TARGET_OPENCL || - preferableTarget == DNN_TARGET_OPENCL_FP16) - customizable &= ld.type != "Power"; - - if (preferableTarget == DNN_TARGET_OPENCL) - customizable &= ld.type != "Eltwise"; - - if (!customizable) - { - addInfEngineNetOutputs(ld); - net = Ptr(); - netBlobsWrappers.clear(); // Is not used for R5 release but we don't wrap it to #ifdef. - layer->preferableTarget = DNN_TARGET_CPU; - continue; - } - } - ld.skip = true; // Initially skip all Inference Engine supported layers. - - // Create a new network if one of inputs from different Inference Engine graph. - for (int i = 0; i < ld.inputBlobsId.size(); ++i) - { - LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; - Ptr inpNode = inpLd.backendNodes[preferableBackend]; - if (!inpNode.empty()) - { - Ptr ieInpNode = inpNode.dynamicCast(); - CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty()); - if (ieInpNode->net != net) - { - net = Ptr(); - netBlobsWrappers.clear(); // Is not used for R5 release but we don't wrap it to #ifdef. - break; - } - } - } - - Ptr node; - if (!net.empty()) - { - if (fused) - { - bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 && - ld.inputBlobs[0]->data == ld.outputBlobs[0].data; - CV_Assert(inPlace); - node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend]; - ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers; - } - } - else - net = Ptr(new InfEngineBackendNet()); - - if (!fused) - { - if (layer->supportBackend(preferableBackend)) - node = layer->initInfEngine(ld.inputBlobsWrappers); - else - { - node = Ptr(new InfEngineBackendNode( - ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals)); - } - } - else if (node.empty()) - continue; - - CV_Assert(!node.empty()); - ld.backendNodes[preferableBackend] = node; - - Ptr ieNode = node.dynamicCast(); - CV_Assert(!ieNode.empty()); - ieNode->net = net; - - for (const auto& pin : blobsToKeep_) - { - if (pin.lid == ld.id) - { - ieNode->net->addOutput(ieNode->layer.getName()); - break; - } - } - - // Convert weights in FP16 for specific targets. - if ((preferableTarget == DNN_TARGET_OPENCL_FP16 || - preferableTarget == DNN_TARGET_MYRIAD || - preferableTarget == DNN_TARGET_HDDL || - preferableTarget == DNN_TARGET_FPGA) && !fused) - { -#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) - for (const std::string& name : {"weights", "biases"}) - { - auto it = ieNode->layer.getParameters().find(name); - if (it != ieNode->layer.getParameters().end()) - { - InferenceEngine::Blob::Ptr bp = it->second.as(); - it->second = convertFp16(std::const_pointer_cast(bp)); - } - } -#else - auto& blobs = ieNode->layer.getConstantData(); - if (blobs.empty()) - { - // In case of non weightable layer we have to specify - // it's precision adding dummy blob. - auto blob = InferenceEngine::make_shared_blob( - InferenceEngine::Precision::FP16, - InferenceEngine::Layout::C, {1}); - blob->allocate(); - blobs[""] = blob; - } - else - { - for (auto& it : blobs) - it.second = convertFp16(std::const_pointer_cast(it.second)); - } -#endif - } - - if (!fused) - net->addLayer(ieNode->layer); - - net->connect(ld.inputBlobsWrappers, ld.outputBlobsWrappers, ieNode->layer.getName()); - net->addBlobs(ld.inputBlobsWrappers); - net->addBlobs(ld.outputBlobsWrappers); - addInfEngineNetOutputs(ld); - } - - // Initialize all networks. - for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it) - { - LayerData &ld = it->second; - if (ld.backendNodes.find(preferableBackend) == ld.backendNodes.end()) - continue; - - Ptr node = ld.backendNodes[preferableBackend]; - if (node.empty()) - continue; - - Ptr ieNode = node.dynamicCast(); - if (ieNode.empty()) - continue; - - CV_Assert(!ieNode->net.empty()); - - if (!ieNode->net->isInitialized()) - { - ieNode->net->init((Target)preferableTarget); - ld.skip = false; - } - } - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - #ifdef HAVE_DNN_NGRAPH + /** mark input pins as outputs from other subnetworks + * FIXIT must be done by DNN engine not ngraph. + */ void addNgraphOutputs(LayerData &ld) { CV_TRACE_FUNCTION(); + CV_LOG_DEBUG(NULL, "DNN/IE: layer of new subnet: " << ld.name << "@" << ld.type); + Ptr layerNet; auto it = ld.backendNodes.find(preferableBackend); if (it != ld.backendNodes.end()) @@ -2075,8 +1776,8 @@ struct Net::Impl : public detail::NetImplBase CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty()); if (layerNet != ieInpNode->net) { - ieInpNode->net->addOutput(ieInpNode->node->get_friendly_name()); - ieInpNode->net->setUnconnectedNodes(ieInpNode); + CV_LOG_DEBUG(NULL, "DNN/IE: pin output between subnets: " << ieInpNode->node->get_friendly_name()); + ieInpNode->net->addOutput(ieInpNode); } } } @@ -2085,14 +1786,13 @@ struct Net::Impl : public detail::NetImplBase void initNgraphBackend(const std::vector& blobsToKeep_) { CV_TRACE_FUNCTION(); - CV_Assert_N(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, haveInfEngine()); + CV_CheckEQ(preferableBackend, DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, ""); - MapIdToLayerData::iterator it; Ptr net; - for (it = layers.begin(); it != layers.end(); ++it) + for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); ++it) { - LayerData &ld = it->second; + const LayerData& ld = it->second; if (ld.id == 0) { CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) || @@ -2128,9 +1828,9 @@ struct Net::Impl : public detail::NetImplBase InfEngineNgraphNet& ienet = *ieNode->net; ienet.reset(); - for (it = layers.begin(); it != layers.end(); ++it) + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) { - LayerData &ld = it->second; + LayerData& ld = it->second; if (ld.id == 0) { for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i) @@ -2172,17 +1872,23 @@ struct Net::Impl : public detail::NetImplBase // Build Inference Engine networks from sets of layers that support this // backend. Split a whole model on several Inference Engine networks if // some of layers are not implemented. - for (it = layers.begin(); it != layers.end(); ++it) + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) { - LayerData &ld = it->second; + LayerData& ld = it->second; + + CV_LOG_DEBUG(NULL, "DNN/IE: processing layer " << ld.name << "@" << ld.type << " (" << ld.id << ") ..."); if (ld.id == 0 && ld.skip) + { + CV_LOG_DEBUG(NULL, "DNN/IE: SKIP!"); continue; + } bool fused = ld.skip; Ptr layer = ld.layerInstance; if (!fused && !layer->supportBackend(preferableBackend)) { + CV_LOG_DEBUG(NULL, "DNN/IE: NOT supported!"); bool customizable = ld.id != 0 && supportsCPUFallback; // TODO: there is a bug in Myriad plugin with custom layers shape infer. @@ -2210,6 +1916,7 @@ struct Net::Impl : public detail::NetImplBase if (!customizable) { + CV_LOG_DEBUG(NULL, "DNN/IE: NOT customizable!"); addNgraphOutputs(ld); net = Ptr(); layer->preferableTarget = DNN_TARGET_CPU; @@ -2221,7 +1928,7 @@ struct Net::Impl : public detail::NetImplBase if (!inpNode.empty()) { Ptr ieNode = inpNode.dynamicCast(); CV_Assert(!ieNode.empty()); - ieNode->net->setUnconnectedNodes(ieNode); + ieNode->net->addOutput(ieNode); } } continue; @@ -2309,9 +2016,22 @@ struct Net::Impl : public detail::NetImplBase continue; auto ieInpNode = inputNodes[i].dynamicCast(); - CV_Assert(oid < ieInpNode->node->get_output_size()); + const auto& ngraph_input_node = ieInpNode->node; + CV_LOG_DEBUG(NULL, "DNN/IE: bind output port " << lid << ":" << oid << " (" << ngraph_input_node->get_friendly_name() << ":" << ngraph_input_node->get_type_info().name << ")"); + + // Handle parameters from other subnets. Output port is not used in this case + if ((ngraph::op::is_parameter(ngraph_input_node) || ngraph::op::is_constant(ngraph_input_node)) && + ngraph_input_node->get_output_size() == 1) + { + inputNodes[i] = Ptr(new InfEngineNgraphNode(ngraph_input_node)); + continue; + } + CV_CheckLT((size_t)oid, ngraph_input_node->get_output_size(), ""); #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4) - inputNodes[i] = Ptr(new InfEngineNgraphNode(ieInpNode->node)); + // FIXIT refactor ".initNgraph()" API to use Output + // WA: use Concat to emulate Identity operation with requested output port + auto oid_node = std::make_shared(ngraph::OutputVector {ngraph_input_node->output(oid)}, 0); + inputNodes[i] = Ptr(new InfEngineNgraphNode(oid_node)); #elif INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_3) inputNodes[i] = Ptr(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid))); #else @@ -2321,21 +2041,30 @@ struct Net::Impl : public detail::NetImplBase if (layer->supportBackend(preferableBackend)) { + CV_LOG_DEBUG(NULL, "DNN/IE: wrap layer " << ld.name << "@" << ld.type << " - outputs: " << ld.outputBlobsWrappers.size()); node = layer->initNgraph(ld.inputBlobsWrappers, inputNodes); +#if 0 // FIXIT doesn't work with multiple outputs (set name is applied to the same node) for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) { InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]); node.dynamicCast()->setName(dataPtr->getName()); } +#else + node.dynamicCast()->setName(layer->name); +#endif } else { + CV_LOG_DEBUG(NULL, "DNN/IE: layer is not supported: " << ld.name << "@" << ld.type); node = Ptr(new InfEngineNgraphNode(inputNodes, ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals)); } } else if (node.empty()) + { + CV_LOG_DEBUG(NULL, "DNN/IE: node.empty() bypass..."); continue; + } ld.backendNodes[preferableBackend] = node; @@ -2343,15 +2072,11 @@ struct Net::Impl : public detail::NetImplBase CV_Assert(!ieNode.empty()); ieNode->net = net; - if (ld.consumers.empty()) { - // TF EAST_text_detection - ieNode->net->setUnconnectedNodes(ieNode); - } for (const auto& pin : blobsToKeep_) { if (pin.lid == ld.id) { - ieNode->net->addOutput(ieNode->node->get_friendly_name()); + ieNode->net->addOutput(ieNode); break; } } @@ -2382,7 +2107,7 @@ struct Net::Impl : public detail::NetImplBase if (!ieNode->net->isInitialized()) { - ieNode->net->setUnconnectedNodes(ieNode); + ieNode->net->addOutput(ieNode); ieNode->net->createNet((Target)preferableTarget); ld.skip = false; } @@ -2430,10 +2155,9 @@ struct Net::Impl : public detail::NetImplBase CV_TRACE_FUNCTION(); CV_Assert_N(preferableBackend == DNN_BACKEND_WEBNN, haveWebnn()); - MapIdToLayerData::iterator it; Ptr net; - for (it = layers.begin(); it != layers.end(); ++it) + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) { LayerData &ld = it->second; if (ld.id == 0) @@ -2462,7 +2186,7 @@ struct Net::Impl : public detail::NetImplBase // Build WebNN networks from sets of layers that support this // backend. Split a whole model on several WebNN networks if // some of layers are not implemented. - for (it = layers.begin(); it != layers.end(); ++it) + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) { LayerData &ld = it->second; @@ -2662,8 +2386,7 @@ struct Net::Impl : public detail::NetImplBase if (!haveVulkan()) return; - MapIdToLayerData::iterator it = layers.begin(); - for (; it != layers.end(); it++) + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) { LayerData &ld = it->second; Ptr layer = ld.layerInstance; @@ -2812,7 +2535,7 @@ struct Net::Impl : public detail::NetImplBase ld.inputLayersId.insert(ld.inputBlobsId[i].lid); //allocate parents - for (set::iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++) + for (set::const_iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++) allocateLayer(*i, layersShapes); //bind inputs @@ -2894,16 +2617,21 @@ struct Net::Impl : public detail::NetImplBase if(!fusion || (preferableBackend != DNN_BACKEND_OPENCV && preferableBackend != DNN_BACKEND_CUDA && - preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) return; +#if 0 // FIXIT mode without fusion is broken due to unsupported layers and handling of "custom" nodes + if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return; +#endif + // scan through all the layers. If there is convolution layer followed by the activation layer, // we try to embed this activation into the convolution and disable separate execution of the activation + + // FIXIT replace by layersToKeep to avoid hacks like "LayerPin(lid, 0)" std::set pinsToKeep(blobsToKeep_.begin(), blobsToKeep_.end()); - MapIdToLayerData::iterator it; - for (it = layers.begin(); it != layers.end(); it++) + for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++) { int lid = it->first; LayerData& ld = layers[lid]; @@ -2925,6 +2653,13 @@ struct Net::Impl : public detail::NetImplBase LayerPin lpNext(ld.consumers[0].lid, 0); while (nextData) { +#ifdef HAVE_INF_ENGINE + if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && pinsToKeep.count(lpNext) != 0) + { + CV_LOG_DEBUG(NULL, "DNN/IE: skip fusing with 'output' node: " << nextData->name << "@" << nextData->type); + break; + } +#endif /* we use `tryFuse` member of convolution layer to fuse eltwise later * it's not intended to be fused here; hence, we stop when we encounter eltwise */ @@ -3450,8 +3185,7 @@ struct Net::Impl : public detail::NetImplBase { CV_TRACE_FUNCTION(); - MapIdToLayerData::iterator it; - for (it = layers.begin(); it != layers.end(); it++) + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) it->second.flag = 0; CV_Assert(!layers[0].outputBlobs.empty()); @@ -3485,7 +3219,7 @@ struct Net::Impl : public detail::NetImplBase // Fake references to input blobs. for (int i = 0; i < layers[0].outputBlobs.size(); ++i) blobManager.addReference(LayerPin(0, i)); - for (it = layers.begin(); it != layers.end(); ++it) + for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); ++it) { const LayerData& ld = it->second; blobManager.addReferences(ld.inputBlobsId); @@ -3496,7 +3230,7 @@ struct Net::Impl : public detail::NetImplBase blobManager.addReference(blobsToKeep_[i]); } - for (it = layers.begin(); it != layers.end(); it++) + for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++) { int lid = it->first; allocateLayer(lid, layersShapes); @@ -3517,7 +3251,11 @@ struct Net::Impl : public detail::NetImplBase TickMeter tm; tm.start(); +#ifndef HAVE_VULKAN + std::map >::const_iterator it = ld.backendNodes.find(preferableBackend); +#else std::map >::iterator it = ld.backendNodes.find(preferableBackend); +#endif if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty()) { if (isAsync) @@ -3699,18 +3437,17 @@ struct Net::Impl : public detail::NetImplBase { forwardHalide(ld.outputBlobsWrappers, node); } - else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - { - forwardInfEngine(ld.outputBlobsWrappers, node, isAsync); - } +#ifdef HAVE_INF_ENGINE else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { forwardNgraph(ld.outputBlobsWrappers, node, isAsync); } - else if (preferableBackend == DNN_BACKEND_WEBNN) +#endif + else if (preferableBackend == DNN_BACKEND_WEBNN) { forwardWebnn(ld.outputBlobsWrappers, node, isAsync); } +#ifdef HAVE_VULKAN else if (preferableBackend == DNN_BACKEND_VKCOM) { try @@ -3724,6 +3461,7 @@ struct Net::Impl : public detail::NetImplBase forwardLayer(ld); } } +#endif else { CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); @@ -3748,8 +3486,7 @@ struct Net::Impl : public detail::NetImplBase if (clearFlags) { - MapIdToLayerData::iterator it; - for (it = layers.begin(); it != layers.end(); it++) + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) it->second.flag = 0; } @@ -3758,8 +3495,7 @@ struct Net::Impl : public detail::NetImplBase return; //forward parents - MapIdToLayerData::iterator it; - for (it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it) + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it) { LayerData &ld = it->second; if (ld.flag) @@ -3845,7 +3581,7 @@ struct Net::Impl : public detail::NetImplBase for(int i = 0; i < inputLayerIds.size(); i++) { int layerId = inputLayerIds[i].lid; - LayersShapesMap::iterator it = + LayersShapesMap::const_iterator it = inOutShapes.find(layerId); if(it == inOutShapes.end() || it->second.out.empty()) @@ -3928,7 +3664,7 @@ struct Net::Impl : public detail::NetImplBase inOutShapes.clear(); inOutShapes[0].in = netInputShapes; //insert shape for first input layer - for (MapIdToLayerData::iterator it = layers.begin(); + for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++) { getLayerShapesRecursively(it->first, inOutShapes); @@ -3969,12 +3705,11 @@ struct Net::Impl : public detail::NetImplBase CV_LOG_DEBUG(NULL, toString(inputShapes, "Network input shapes")); LayersShapesMap layersShapes; layersShapes[0].in = inputShapes; - for (MapIdToLayerData::iterator it = layers.begin(); - it != layers.end(); it++) + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) { int layerId = it->first; LayerData& layerData = it->second; - std::vector& inputLayerIds = layerData.inputBlobsId; + const std::vector& inputLayerIds = layerData.inputBlobsId; LayerShapes& layerShapes = layersShapes[layerId]; CV_LOG_DEBUG(NULL, "layer " << layerId << ": [" << layerData.type << "]:(" << layerData.name << ") with inputs.size=" << inputLayerIds.size()); if (layerShapes.in.empty()) @@ -3984,7 +3719,7 @@ struct Net::Impl : public detail::NetImplBase const LayerPin& inputPin = inputLayerIds[i]; int inputLayerId = inputPin.lid; CV_LOG_DEBUG(NULL, " input[" << i << "] " << inputLayerId << ":" << inputPin.oid << " as [" << layers[inputLayerId].type << "]:(" << layers[inputLayerId].name << ")"); - LayersShapesMap::iterator inputIt = layersShapes.find(inputLayerId); + LayersShapesMap::const_iterator inputIt = layersShapes.find(inputLayerId); if (inputIt == layersShapes.end() || inputIt->second.out.empty()) { getLayerShapesRecursively(inputLayerId, layersShapes); @@ -4001,19 +3736,23 @@ struct Net::Impl : public detail::NetImplBase CV_LOG_DEBUG(NULL, "updateLayersShapes() - DONE"); } - LayerPin getLatestLayerPin(const std::vector& pins) + LayerPin getLatestLayerPin(const std::vector& pins) const { return *std::max_element(pins.begin(), pins.end()); } - Mat getBlob(const LayerPin& pin) + Mat getBlob(const LayerPin& pin) const { CV_TRACE_FUNCTION(); if (!pin.valid()) CV_Error(Error::StsObjectNotFound, "Requested blob not found"); - LayerData &ld = layers[pin.lid]; + MapIdToLayerData::const_iterator it = layers.find(pin.lid); + if (it == layers.end()) + CV_Error_(Error::StsOutOfRange, ("Layer #%d is not valid (output #%d requested)", pin.lid, pin.oid)); + + const LayerData &ld = it->second; if ((size_t)pin.oid >= ld.outputBlobs.size()) { CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %zu outputs, " @@ -4029,6 +3768,7 @@ struct Net::Impl : public detail::NetImplBase if (ld.outputBlobs[pin.oid].depth() == CV_16S) { + Mat output_blob; convertFp16(ld.outputBlobs[pin.oid], output_blob); return output_blob; } @@ -4036,7 +3776,7 @@ struct Net::Impl : public detail::NetImplBase return ld.outputBlobs[pin.oid]; } - Mat getBlob(String outputName) + Mat getBlob(String outputName) const { return getBlob(getPinByAlias(outputName)); } @@ -4062,27 +3802,13 @@ struct Net::Impl : public detail::NetImplBase // Transfer data to CPU if it's require. ld.outputBlobsWrappers[pin.oid]->copyToHost(); } - CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); + CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); - if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - Ptr wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast(); - return std::move(wrapper->futureMat); + Ptr wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast(); + return std::move(wrapper->futureMat); #else - CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support"); -#endif - } - else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { -#ifdef HAVE_DNN_NGRAPH - Ptr wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast(); - return std::move(wrapper->futureMat); -#else - CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph"); -#endif - } + CV_Error(Error::StsNotImplemented, "DNN: OpenVINO/nGraph backend is required"); #endif // HAVE_INF_ENGINE - CV_Error(Error::StsNotImplemented, "DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 backend is required"); } AsyncArray getBlobAsync(String outputName) @@ -4096,9 +3822,9 @@ struct Net::Impl : public detail::NetImplBase Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet); #endif - string dump(); + string dump() const; - void dumpNetworkToFile() + void dumpNetworkToFile() const { #ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP string dumpFileNameBase = getDumpFileNameBase(); @@ -4156,40 +3882,18 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe CV_TRACE_REGION_NEXT("backendNode"); Ptr backendNode; -#ifdef HAVE_DNN_NGRAPH - if (DNN_BACKEND_INFERENCE_ENGINE_NGRAPH == getInferenceEngineBackendTypeParam()) { auto fake_node = std::make_shared(ngraph::element::f32, ngraph::Shape{}); Ptr backendNodeNGraph(new InfEngineNgraphNode(fake_node)); backendNodeNGraph->net = Ptr(new InfEngineNgraphNet(*(cvNet.impl), ieNet)); backendNode = backendNodeNGraph; } - else -#endif - { -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - Ptr backendNodeNN(new InfEngineBackendNode(InferenceEngine::Builder::Layer(""))); - backendNodeNN->net = Ptr(new InfEngineBackendNet(ieNet)); - backendNode = backendNodeNN; -#else - CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support"); -#endif - } CV_TRACE_REGION_NEXT("register_outputs"); -#ifdef HAVE_DNN_NGRAPH auto ngraphFunction = ieNet.getFunction(); -#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_2) - std::list< std::shared_ptr > ngraphOperations; -#else - std::vector< std::shared_ptr > ngraphOperations; -#endif - if (ngraphFunction) - { - ngraphOperations = ngraphFunction->get_ops(); - } -#endif + CV_Assert(ngraphFunction); + std::vector< std::shared_ptr > ngraphOperations = ngraphFunction->get_ops(); for (auto& it : ieNet.getOutputsInfo()) { @@ -4201,8 +3905,6 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe LayerData& ld = cvNet.impl->layers[lid]; -#ifdef HAVE_DNN_NGRAPH - if (DNN_BACKEND_INFERENCE_ENGINE_NGRAPH == getInferenceEngineBackendTypeParam()) { Ptr cvLayer(new NgraphBackendLayer(ieNet)); cvLayer->name = outputName; @@ -4210,44 +3912,18 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe auto process_layer = [&](const std::string& name) -> bool { - if (ngraphFunction) + CV_TRACE_REGION("ngraph_function"); + for (const auto& op : ngraphOperations) { - CV_TRACE_REGION("ngraph_function"); - for (const auto& op : ngraphOperations) + CV_Assert(op); + if (op->get_friendly_name() == name) { - CV_Assert(op); - if (op->get_friendly_name() == name) - { - const std::string typeName = op->get_type_info().name; - cvLayer->type = typeName; - return true; - } - } - return false; - } - else - { -#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4) - CV_Error(Error::StsNotImplemented, "This OpenCV version is built with Inference Engine which has dropped IR v7 support"); -#else - CV_TRACE_REGION("legacy_cnn_layer"); - try - { - InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(name.c_str()); - CV_Assert(ieLayer); - - cvLayer->type = ieLayer->type; + const std::string typeName = op->get_type_info().name; + cvLayer->type = typeName; return true; } - catch (const std::exception& e) - { - CV_UNUSED(e); - CV_LOG_DEBUG(NULL, "IE layer extraction failure: '" << name << "' - " << e.what()); - return false; - } -#endif - } + return false; }; bool found = process_layer(outputName); @@ -4266,37 +3942,6 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe ld.layerInstance = cvLayer; ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NGRAPH] = backendNode; } - else -#endif - { -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - Ptr cvLayer(new InfEngineBackendLayer(ieNet)); - - InferenceEngine::CNNLayerPtr ieLayer; - try - { - ieLayer = ieNet.getLayerByName(outputName.c_str()); - } - catch (...) - { - auto pos = outputName.rfind('.'); // cut port number: ".0" - if (pos != std::string::npos) - { - std::string layerName = outputName.substr(0, pos); - ieLayer = ieNet.getLayerByName(layerName.c_str()); - } - } - CV_Assert(ieLayer); - - cvLayer->name = outputName; - cvLayer->type = ieLayer->type; - ld.layerInstance = cvLayer; - - ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019] = backendNode; -#else - CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support"); -#endif - } for (int i = 0; i < inputsNames.size(); ++i) cvNet.connect(0, i, lid, i); @@ -4304,7 +3949,7 @@ Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNe CV_TRACE_REGION_NEXT("finalize"); - cvNet.setPreferableBackend(getInferenceEngineBackendTypeParam()); + cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); cvNet.impl->skipInfEngineInit = true; return cvNet; @@ -4318,16 +3963,11 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin) CV_UNUSED(xml); CV_UNUSED(bin); CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer."); #else -#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3) - InferenceEngine::CNNNetReader reader; - reader.ReadNetwork(xml); - reader.ReadWeights(bin); - InferenceEngine::CNNNetwork ieNet = reader.getNetwork(); -#else + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + InferenceEngine::Core& ie = getCore(""); InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin); -#endif return Impl::createNetworkFromModelOptimizer(ieNet); #endif // HAVE_INF_ENGINE @@ -4354,26 +3994,8 @@ Net Net::readFromModelOptimizer( CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer."); #else -#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3) - InferenceEngine::CNNNetReader reader; + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; - try - { - reader.ReadNetwork(bufferModelConfigPtr, bufferModelConfigSize); - - InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C); - InferenceEngine::TBlob::Ptr weightsBlobPtr(new InferenceEngine::TBlob(tensorDesc)); - weightsBlobPtr->allocate(); - std::memcpy(weightsBlobPtr->buffer(), (uchar*)bufferWeightsPtr, bufferWeightsSize); - reader.SetWeights(weightsBlobPtr); - } - catch (const std::exception& e) - { - CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what()); - } - - InferenceEngine::CNNNetwork ieNet = reader.getNetwork(); -#else InferenceEngine::Core& ie = getCore(""); std::string model; model.assign((char*)bufferModelConfigPtr, bufferModelConfigSize); @@ -4390,7 +4012,6 @@ Net Net::readFromModelOptimizer( { CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what()); } -#endif return Impl::createNetworkFromModelOptimizer(ieNet); #endif // HAVE_INF_ENGINE @@ -4404,34 +4025,8 @@ Net::~Net() int Net::addLayer(const String &name, const String &type, const int &dtype, LayerParams ¶ms) { CV_TRACE_FUNCTION(); - - int id = impl->getLayerId(name); - if (id >= 0) - { - if (!DNN_DIAGNOSTICS_RUN || type != "NotImplemented") - { - CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net"); - return -1; - } - else - { - LayerData& ld = impl->layers.find(id)->second; - ld.type = type; - ld.params = params; - return -1; - } - } - - id = ++impl->lastLayerId; - impl->layerNameToId.insert(std::make_pair(name, id)); - impl->layers.insert(std::make_pair(id, LayerData(id, name, type, dtype, params))); - if (params.get("has_dynamic_shapes", false)) - impl->hasDynamicShapes = true; - - if (dtype == CV_8S) - impl->netWasQuantized = true; - - return id; + CV_Assert(impl); + return impl->addLayer(name, type, dtype, params); } int Net::addLayer(const String &name, const String &type, LayerParams ¶ms) @@ -4475,10 +4070,18 @@ void Net::connect(String _outPin, String _inPin) impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid); } +int Net::registerOutput(const std::string& outputName, int layerId, int outputPort) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->registerOutput(outputName, layerId, outputPort); +} + Mat Net::forward(const String& outputName) { CV_TRACE_FUNCTION(); CV_Assert(!empty()); + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; String layerName = outputName; @@ -4500,6 +4103,7 @@ AsyncArray Net::forwardAsync(const String& outputName) { CV_TRACE_FUNCTION(); CV_Assert(!empty()); + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; #ifdef CV_CXX11 String layerName = outputName; @@ -4514,8 +4118,8 @@ AsyncArray Net::forwardAsync(const String& outputName) std::vector pins(1, impl->getPinByAlias(layerName)); impl->setUpNet(pins); - if (!(impl->preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || impl->preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) - CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward is supported for Inference Engine backends only"); + if (impl->preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward is supported for Inference Engine backend only"); impl->isAsync = true; impl->forwardToLayer(impl->getLayerData(layerName)); @@ -4531,6 +4135,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName) { CV_TRACE_FUNCTION(); CV_Assert(!empty()); + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; String layerName = outputName; @@ -4612,6 +4217,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const std::vector& outBlobNames) { CV_TRACE_FUNCTION(); + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; std::vector pins; for (int i = 0; i < outBlobNames.size(); i++) @@ -4639,6 +4245,7 @@ void Net::forward(std::vector >& outputBlobs, const std::vector& outBlobNames) { CV_TRACE_FUNCTION(); + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; std::vector pins; for (int i = 0; i < outBlobNames.size(); i++) @@ -4937,7 +4544,7 @@ void Net::setPreferableBackend(int backendId) #ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE) - backendId = getInferenceEngineBackendTypeParam(); + backendId = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; #endif if( impl->preferableBackend != backendId ) @@ -5000,6 +4607,7 @@ void Net::setInput(InputArray blob, const String& name, double scalefactor, cons { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; LayerPin pin; pin.lid = 0; @@ -5059,7 +4667,7 @@ void Net::setInput(InputArray blob, const String& name, double scalefactor, cons impl->netWasAllocated = impl->netWasAllocated && oldShape; } -Mat Net::getParam(LayerId layer, int numParam) +Mat Net::getParam(int layer, int numParam) const { LayerData &ld = impl->getLayerData(layer); std::vector &layerBlobs = ld.getLayerInstance()->blobs; @@ -5067,7 +4675,7 @@ Mat Net::getParam(LayerId layer, int numParam) return layerBlobs[numParam]; } -void Net::setParam(LayerId layer, int numParam, const Mat &blob) +void Net::setParam(int layer, int numParam, const Mat &blob) { LayerData &ld = impl->getLayerData(layer); @@ -5077,7 +4685,7 @@ void Net::setParam(LayerId layer, int numParam, const Mat &blob) layerBlobs[numParam] = blob; } -int Net::getLayerId(const String &layer) +int Net::getLayerId(const String &layer) const { return impl->getLayerId(layer); } @@ -5120,7 +4728,7 @@ String Net::dump() return impl->dump(); } -string Net::Impl::dump() +string Net::Impl::dump() const { bool hasInput = !netInputLayer->inputsData.empty(); @@ -5181,8 +4789,8 @@ string Net::Impl::dump() case DNN_BACKEND_DEFAULT: backend = "DEFAULT/"; break; case DNN_BACKEND_HALIDE: backend = "HALIDE/"; break; case DNN_BACKEND_INFERENCE_ENGINE: // fallthru - case DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019: backend = "DLIE/"; break; - case DNN_BACKEND_INFERENCE_ENGINE_NGRAPH: backend = "NGRAPH/"; break; + case DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019: // fallthru + case DNN_BACKEND_INFERENCE_ENGINE_NGRAPH: backend = "OpenVINO/"; break; case DNN_BACKEND_OPENCV: backend = "OCV/"; break; case DNN_BACKEND_VKCOM: backend = "VULKAN/"; break; case DNN_BACKEND_CUDA: backend = "CUDA/"; break; @@ -5388,13 +4996,18 @@ void Net::dumpToFile(const String& path) { file.close(); } -Ptr Net::getLayer(LayerId layerId) +Ptr Net::getLayer(int layerId) const +{ + LayerData &ld = impl->getLayerData(layerId); + return ld.getLayerInstance(); +} +Ptr Net::getLayer(const LayerId& layerId) const { LayerData &ld = impl->getLayerData(layerId); return ld.getLayerInstance(); } -std::vector > Net::getLayerInputs(LayerId layerId) +std::vector > Net::getLayerInputs(int layerId) const { LayerData &ld = impl->getLayerData(layerId); @@ -5413,7 +5026,7 @@ std::vector Net::getLayerNames() const std::vector res; res.reserve(impl->layers.size()); - Impl::MapIdToLayerData::iterator it; + Impl::MapIdToLayerData::const_iterator it; for (it = impl->layers.begin(); it != impl->layers.end(); it++) { if (it->second.id) //skip Data layer @@ -5430,13 +5043,27 @@ bool Net::empty() const std::vector Net::getUnconnectedOutLayers() const { + CV_TRACE_FUNCTION(); + CV_Assert(impl); + std::vector layersIds; - Impl::MapIdToLayerData::iterator it; + // registerOutput() flow + const std::map& outputNameToId = impl->outputNameToId; + if (!outputNameToId.empty()) + { + for (std::map::const_iterator it = outputNameToId.begin(); it != outputNameToId.end(); ++it) + { + layersIds.push_back(it->second); + } + return layersIds; + } + + Impl::MapIdToLayerData::const_iterator it; for (it = impl->layers.begin(); it != impl->layers.end(); it++) { int lid = it->first; - LayerData &ld = it->second; + const LayerData &ld = it->second; if (ld.requiredOutputs.size() == 0) layersIds.push_back(lid); @@ -5536,13 +5163,13 @@ int64 Net::getFLOPS(const MatShape& netInputShape) const int64 Net::getFLOPS(const int layerId, const std::vector& netInputShapes) const { - Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId); + Impl::MapIdToLayerData::const_iterator layer = impl->layers.find(layerId); CV_Assert(layer != impl->layers.end()); LayerShapes shapes; impl->getLayerShapes(netInputShapes, layerId, shapes); - return layer->second.getLayerInstance()->getFLOPS(shapes.in, shapes.out); + return const_cast(layer->second).getLayerInstance()->getFLOPS(shapes.in, shapes.out); } int64 Net::getFLOPS(const int layerId, @@ -5556,7 +5183,7 @@ void Net::getLayerTypes(std::vector& layersTypes) const layersTypes.clear(); std::map layers; - for (Impl::MapIdToLayerData::iterator it = impl->layers.begin(); + for (Impl::MapIdToLayerData::const_iterator it = impl->layers.begin(); it != impl->layers.end(); it++) { if (layers.find(it->second.type) == layers.end()) @@ -5564,7 +5191,7 @@ void Net::getLayerTypes(std::vector& layersTypes) const layers[it->second.type]++; } - for (std::map::iterator it = layers.begin(); + for (std::map::const_iterator it = layers.begin(); it != layers.end(); it++) { layersTypes.push_back(it->first); @@ -5574,7 +5201,7 @@ void Net::getLayerTypes(std::vector& layersTypes) const int Net::getLayersCount(const String& layerType) const { int count = 0; - for (Impl::MapIdToLayerData::iterator it = impl->layers.begin(); + for (Impl::MapIdToLayerData::const_iterator it = impl->layers.begin(); it != impl->layers.end(); it++) { if (it->second.type == layerType) @@ -5589,7 +5216,7 @@ void Net::getMemoryConsumption(const int layerId, { CV_TRACE_FUNCTION(); - Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId); + Impl::MapIdToLayerData::const_iterator layer = impl->layers.find(layerId); CV_Assert(layer != impl->layers.end()); weights = blobs = 0; @@ -5658,7 +5285,7 @@ void Net::getMemoryConsumption(const std::vector& netInputShapes, for(int i = 0; i < layerIds.size(); i++) { int w = 0, b = 0; - Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerIds[i]); + Impl::MapIdToLayerData::const_iterator layer = impl->layers.find(layerIds[i]); CV_Assert(layer != impl->layers.end()); for(int j = 0; j < layer->second.params.blobs.size(); j++) @@ -5764,13 +5391,6 @@ Ptr Layer::initHalide(const std::vector > &) return Ptr(); } -Ptr Layer::initInfEngine(const std::vector > &) -{ - CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type + - " layers is not defined."); - return Ptr(); -} - Ptr Layer::initNgraph(const std::vector > & inputs, const std::vector >& nodes) { CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type + @@ -6092,6 +5712,13 @@ void LayerFactory::unregisterLayer(const String &type) } } +bool LayerFactory::isLayerRegistered(const std::string& type) +{ + cv::AutoLock lock(getLayerFactoryMutex()); + auto& registeredLayers = getLayerFactoryImpl(); + return registeredLayers.find(type) != registeredLayers.end(); +} + Ptr LayerFactory::createLayerInstance(const String &type, LayerParams& params) { CV_TRACE_FUNCTION(); diff --git a/modules/dnn/src/dnn_common.hpp b/modules/dnn/src/dnn_common.hpp index ffeb3bfda1..6350a4e168 100644 --- a/modules/dnn/src/dnn_common.hpp +++ b/modules/dnn/src/dnn_common.hpp @@ -5,8 +5,8 @@ #ifndef __OPENCV_DNN_COMMON_HPP__ #define __OPENCV_DNN_COMMON_HPP__ -#include #include +#include #include @@ -59,7 +59,7 @@ class LayerHandler public: void addMissing(const std::string& name, const std::string& type); bool contains(const std::string& type) const; - void printMissing(); + void printMissing() const; protected: LayerParams getNotImplementedParams(const std::string& name, const std::string& op); @@ -71,12 +71,12 @@ private: struct NetImplBase { const int networkId; // network global identifier - int networkDumpCounter; // dump counter + mutable int networkDumpCounter; // dump counter int dumpLevel; // level of information dumps (initialized through OPENCV_DNN_NETWORK_DUMP parameter) NetImplBase(); - std::string getDumpFileNameBase(); + std::string getDumpFileNameBase() const; }; } // namespace detail diff --git a/modules/dnn/src/graph_simplifier.cpp b/modules/dnn/src/graph_simplifier.cpp index a23fce30f5..e58e0e38e8 100644 --- a/modules/dnn/src/graph_simplifier.cpp +++ b/modules/dnn/src/graph_simplifier.cpp @@ -108,7 +108,7 @@ bool Subgraph::match(const Ptr& net, int nodeId, for (int j = 0; j < inputNodes.size(); ++j) { - if (nodes[inputNodes[j]].empty()) // Unknown input node type. + if (nodes[inputNodes[j]].empty() || node->getInputName(j).empty()) // Unknown input node type. continue; nodeId = getInputNodeId(net, node, j); const Ptr inpNode = net->getNode(nodeId); diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp index a61766337e..44cd074de4 100644 --- a/modules/dnn/src/ie_ngraph.cpp +++ b/modules/dnn/src/ie_ngraph.cpp @@ -330,7 +330,7 @@ public: InfEngineNgraphNode::InfEngineNgraphNode(std::shared_ptr&& _node) : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH), node(std::move(_node)) {} -InfEngineNgraphNode::InfEngineNgraphNode(std::shared_ptr& _node) +InfEngineNgraphNode::InfEngineNgraphNode(const std::shared_ptr& _node) : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH), node(_node) {} InfEngineNgraphNode::InfEngineNgraphNode(const std::vector >& nodes, @@ -379,16 +379,21 @@ InfEngineNgraphNet::InfEngineNgraphNet(detail::NetImplBase& netImpl, InferenceEn device_name = "CPU"; } -void InfEngineNgraphNet::addOutput(const std::string& name) +void InfEngineNgraphNet::addOutput(const Ptr& node) { - requestedOutputs.push_back(name); + CV_Assert(node); + CV_Assert(node->node); + const std::string& name = node->node->get_friendly_name(); + requestedOutputs.insert({name, node}); } void InfEngineNgraphNet::setNodePtr(std::shared_ptr* ptr) { all_nodes.emplace((*ptr)->get_friendly_name(), ptr); } - void InfEngineNgraphNet::release() { + void InfEngineNgraphNet::release() + { + // FIXIT release should not be conditional, release ALL for (auto& node : components.back()) { #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4) if (!(ngraph::op::is_parameter(node) || ngraph::op::is_output(node) || ngraph::op::is_constant(node)) ) { @@ -397,7 +402,6 @@ void InfEngineNgraphNet::setNodePtr(std::shared_ptr* ptr) { #endif auto it = all_nodes.find(node->get_friendly_name()); if (it != all_nodes.end()) { - unconnectedNodes.erase(*(it->second)); it->second->reset(); all_nodes.erase(it); } @@ -422,7 +426,8 @@ void InfEngineNgraphNet::dfs(std::shared_ptr& node, } } -int InfEngineNgraphNet::getNumComponents() { +int InfEngineNgraphNet::getNumComponents() +{ if (!components.empty()) { return components.size(); } @@ -445,17 +450,21 @@ int InfEngineNgraphNet::getNumComponents() { void InfEngineNgraphNet::createNet(Target targetId) { if (!hasNetOwner) { - CV_Assert(!unconnectedNodes.empty()); + CV_Assert(!requestedOutputs.empty()); ngraph::ResultVector outs; - for (auto& node : unconnectedNodes) + + for (auto output_node_it = requestedOutputs.begin(); output_node_it != requestedOutputs.end(); ++output_node_it) { - auto out = std::make_shared(node); + CV_LOG_DEBUG(NULL, "DNN/NGRAPH: Add 'Result' output: " << output_node_it->first); + CV_Assert(output_node_it->second); + auto out = std::make_shared(output_node_it->second->node); outs.push_back(out); } CV_Assert_N(!inputs_vec.empty(), !outs.empty()); ngraph_function = std::make_shared(outs, inputs_vec); int num_comp = getNumComponents(); + CV_LOG_DEBUG(NULL, "DNN/IE: number of subgraphs: " << num_comp); if (num_comp > 1) { for (int i = num_comp - 1; i >= 0; --i) { ngraph::ResultVector outputs; @@ -466,6 +475,7 @@ void InfEngineNgraphNet::createNet(Target targetId) { #else if (node->is_parameter()) { #endif + CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << "]: +input[" << inps.size() << "] = '" << node->get_friendly_name() << "'"); auto parameter = std::dynamic_pointer_cast(node); inps.push_back(parameter); } @@ -474,10 +484,12 @@ void InfEngineNgraphNet::createNet(Target targetId) { #else else if (node->is_output()) { #endif + CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << "]: +output[" << outputs.size() << "] = '" << node->get_friendly_name() << "'"); auto result = std::dynamic_pointer_cast(node); outputs.push_back(result); } } + CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << ": nodes=" << components.back().size() << " inputs=" << inps.size() << " outputs=" << outputs.size()); isInit = false; CV_Assert_N(!inps.empty(), !outputs.empty()); ngraph_function = std::make_shared(outputs, inps); @@ -574,7 +586,7 @@ void InfEngineNgraphNet::init(Target targetId) auto node = ngraph_function->output(i).get_node(); for (size_t j = 0; j < node->get_input_size(); ++j) { std::string name = node->input_value(j).get_node()->get_friendly_name(); - auto iter = std::find(requestedOutputs.begin(), requestedOutputs.end(), name); + auto iter = requestedOutputs.find(name); if (iter != requestedOutputs.end()) { requestedOutputs.erase(iter); cnn.addOutput(name); @@ -582,10 +594,6 @@ void InfEngineNgraphNet::init(Target targetId) } } } - for (const auto& name : requestedOutputs) - { - cnn.addOutput(name); - } for (const auto& it : cnn.getInputsInfo()) { @@ -630,9 +638,6 @@ ngraph::ParameterVector InfEngineNgraphNet::setInputs(const std::vector return current_inp; } -void InfEngineNgraphNet::setUnconnectedNodes(Ptr& node) { - unconnectedNodes.insert(node->node); -} void InfEngineNgraphNet::initPlugin(InferenceEngine::CNNNetwork& net) { @@ -732,10 +737,10 @@ void InfEngineNgraphNet::initPlugin(InferenceEngine::CNNNetwork& net) } } } - if (isHetero) - netExec = ie.LoadNetwork(net, "HETERO:" + device_name + ",CPU", config); - else - netExec = ie.LoadNetwork(net, device_name, config); + + std::string ieDevice = isHetero ? ("HETERO:" + device_name + ",CPU") : device_name; + CV_LOG_INFO(NULL, "DNN/IE: Calling LoadNetwork(device=" << ieDevice << ")..."); + netExec = ie.LoadNetwork(net, ieDevice, config); } catch (const std::exception& ex) { diff --git a/modules/dnn/src/ie_ngraph.hpp b/modules/dnn/src/ie_ngraph.hpp index 617f1d4542..0d287a22a5 100644 --- a/modules/dnn/src/ie_ngraph.hpp +++ b/modules/dnn/src/ie_ngraph.hpp @@ -37,7 +37,7 @@ public: InfEngineNgraphNet(detail::NetImplBase& netImpl); InfEngineNgraphNet(detail::NetImplBase& netImpl, InferenceEngine::CNNNetwork& net); - void addOutput(const std::string& name); + void addOutput(const Ptr& node); bool isInitialized(); void init(Target targetId); @@ -47,7 +47,6 @@ public: void initPlugin(InferenceEngine::CNNNetwork& net); ngraph::ParameterVector setInputs(const std::vector& inputs, const std::vector& names); - void setUnconnectedNodes(Ptr& node); void addBlobs(const std::vector >& ptrs); void createNet(Target targetId); @@ -88,8 +87,7 @@ public: InferenceEngine::CNNNetwork cnn; bool hasNetOwner; - std::vector requestedOutputs; - std::unordered_set> unconnectedNodes; + std::unordered_map > requestedOutputs; std::map outputsDesc; }; @@ -102,7 +100,7 @@ public: std::vector& internals); InfEngineNgraphNode(std::shared_ptr&& _node); - InfEngineNgraphNode(std::shared_ptr& _node); + InfEngineNgraphNode(const std::shared_ptr& _node); void setName(const std::string& name); diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index d22a070805..bb81f14425 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -170,11 +170,14 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return preferableTarget == DNN_TARGET_CPU || dims == 4; +#endif return (backendId == DNN_BACKEND_OPENCV) || backendId == DNN_BACKEND_CUDA || (backendId == DNN_BACKEND_HALIDE && haveHalide()) || - backendId == DNN_BACKEND_WEBNN || - ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine() && (preferableTarget == DNN_TARGET_CPU || dims == 4)); + backendId == DNN_BACKEND_WEBNN; } #ifdef HAVE_OPENCL @@ -382,16 +385,6 @@ public: } #endif // HAVE_HALIDE -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE - { - InferenceEngine::Builder::Layer ieLayer = InferenceEngine::Builder::ScaleShiftLayer(name); - const size_t numChannels = weights_.total(); - addConstantData("weights", wrapToInfEngineBlob(weights_, {numChannels}, InferenceEngine::Layout::C), ieLayer); - addConstantData("biases", wrapToInfEngineBlob(bias_, {numChannels}, InferenceEngine::Layout::C), ieLayer); - return Ptr(new InfEngineBackendNode(ieLayer)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE diff --git a/modules/dnn/src/layers/blank_layer.cpp b/modules/dnn/src/layers/blank_layer.cpp index 59548a9c0c..0d6ab19e4d 100644 --- a/modules/dnn/src/layers/blank_layer.cpp +++ b/modules/dnn/src/layers/blank_layer.cpp @@ -63,9 +63,12 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return true; +#endif return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_CUDA || - ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine()); + backendId == DNN_BACKEND_CUDA; } bool getMemoryShapes(const std::vector &inputs, @@ -116,32 +119,6 @@ public: } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >& inputs) CV_OVERRIDE - { - InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]); - std::vector dims = input->getDims(); - CV_Assert(!dims.empty()); - - InferenceEngine::Builder::Layer ieLayer(name); - ieLayer.setName(name); - if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL) - { - ieLayer.setType("Copy"); - } - else - { - ieLayer.setType("Split"); - ieLayer.getParameters()["axis"] = dims.size() - 1; - ieLayer.getParameters()["out_sizes"] = dims[0]; - } - ieLayer.setInputPorts({InferenceEngine::Port(dims)}); - ieLayer.setOutputPorts(std::vector(1)); - return Ptr(new InfEngineBackendNode(ieLayer)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - - #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp index f620d66a39..675546f76f 100644 --- a/modules/dnn/src/layers/concat_layer.cpp +++ b/modules/dnn/src/layers/concat_layer.cpp @@ -113,11 +113,13 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return true; +#endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || (backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !padding) || // By channels - (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && haveInfEngine() && !padding) || - backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH || (backendId == DNN_BACKEND_WEBNN && !padding) || (backendId == DNN_BACKEND_VKCOM && haveVulkan() && !padding); } @@ -343,18 +345,6 @@ public: return Ptr(); } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >& inputs) CV_OVERRIDE - { - InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]); - - InferenceEngine::Builder::ConcatLayer ieLayer(name); - ieLayer.setAxis(normalize_axis(axis, input->getDims().size())); - ieLayer.setInputPorts(std::vector(inputs.size())); - return Ptr(new InfEngineBackendNode(ieLayer)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/layers/const_layer.cpp b/modules/dnn/src/layers/const_layer.cpp index 1f307b8fa6..4392763be7 100644 --- a/modules/dnn/src/layers/const_layer.cpp +++ b/modules/dnn/src/layers/const_layer.cpp @@ -34,9 +34,11 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return true; +#endif return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || - backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH || backendId == DNN_BACKEND_WEBNN || backendId == DNN_BACKEND_CUDA; } @@ -78,16 +80,6 @@ public: } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE - { - InferenceEngine::Builder::ConstLayer ieLayer(name); - ieLayer.setData(wrapToInfEngineBlob(blobs[0])); - return Ptr(new InfEngineBackendNode(ieLayer)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - - #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index bcc783d8a0..1af34472df 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -330,7 +330,7 @@ public: } #endif #ifdef HAVE_INF_ENGINE - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { bool isArmTarget = preferableTarget == DNN_TARGET_CPU && isArmComputePlugin(); if (isArmTarget && blobs.empty()) @@ -340,7 +340,7 @@ public: if (ksize == 3) return preferableTarget != DNN_TARGET_MYRIAD && !isArmTarget; bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL; - if ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || !isMyriad) && blobs.empty()) + if (!isMyriad && blobs.empty()) return false; return (!isMyriad || dilation.width == dilation.height); } @@ -421,7 +421,9 @@ public: if (!blobs.empty()) { Mat wm = blobs[0].reshape(1, numOutput); - if( wm.step1() % VEC_ALIGN != 0 ) + if ((wm.step1() % VEC_ALIGN != 0) || + !isAligned(wm.data) + ) { int newcols = (int)alignSize(wm.step1(), VEC_ALIGN); Mat wm_buffer = Mat(numOutput, newcols, wm.type()); @@ -759,69 +761,6 @@ public: return Ptr(); } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector > &inputs) CV_OVERRIDE - { - CV_Assert(!blobs.empty()); - InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]); - std::vector dims = input->getDims(); - CV_Assert(dims.size() == 4 || dims.size() == 5); - const int inpCn = dims[1]; - const int outCn = blobs[0].size[0]; - const int inpGroupCn = blobs[0].size[1]; - const int group = inpCn / inpGroupCn; - InferenceEngine::Layout layout = (dims.size() == 4) ? InferenceEngine::Layout::OIHW : - InferenceEngine::Layout::NCDHW; - - auto ieWeights = wrapToInfEngineBlob(blobs[0], layout); - if (fusedWeights) - { - if (weightsMat.isContinuous()) - { - Mat cvWeights = weightsMat.reshape(1, blobs[0].dims, blobs[0].size); - ieWeights = wrapToInfEngineBlob(cvWeights, layout); - } - else - { - ieWeights = InferenceEngine::make_shared_blob({ - InferenceEngine::Precision::FP32, - ieWeights->getTensorDesc().getDims(), layout - }); - ieWeights->allocate(); - - Mat newWeights = infEngineBlobToMat(ieWeights).reshape(1, outCn); - Mat cvWeights = weightsMat.colRange(0, newWeights.cols); - cvWeights.copyTo(newWeights); - } - } - InferenceEngine::Blob::Ptr ieBiases; - if (hasBias() || fusedBias) - { - Mat biasesMat({outCn}, CV_32F, &biasvec[0]); - ieBiases = wrapToInfEngineBlob(biasesMat, {(size_t)outCn}, InferenceEngine::Layout::C); - } - - InferenceEngine::Builder::ConvolutionLayer ieLayer(name); - - ieLayer.setKernel(kernel_size); - ieLayer.setStrides(strides); - ieLayer.setDilation(dilations); - ieLayer.setPaddingsBegin(pads_begin); - ieLayer.setPaddingsEnd(pads_end); - ieLayer.setGroup((size_t)group); - ieLayer.setOutDepth((size_t)outCn); - - InferenceEngine::Builder::Layer l = ieLayer; - addConstantData("weights", ieWeights, l); - if (ieBiases) - addConstantData("biases", ieBiases, l); - - if (!padMode.empty()) - l.getParameters()["auto_pad"] = padMode == "VALID" ? std::string("valid") : std::string("same_upper"); - - return Ptr(new InfEngineBackendNode(l)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector > &inputs, @@ -1660,7 +1599,6 @@ public: } } } - // now compute dot product of the weights // and im2row-transformed part of the tensor #if CV_TRY_AVX512_SKX @@ -1995,13 +1933,6 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); -#if CV_SSE3 - uint32_t ftzMode = _MM_GET_FLUSH_ZERO_MODE(); - uint32_t dazMode = _MM_GET_DENORMALS_ZERO_MODE(); - _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); - _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); -#endif - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), forward_ocl(inputs_arr, outputs_arr, internals_arr)) @@ -2138,10 +2069,6 @@ public: ParallelConv::run(inputs[0], outputs[0], weightsMat, biasvec, reluslope, kernel_size, strides, pads_begin, pads_end, dilations, activ.get(), ngroups, nstripes); } -#if CV_SSE3 - _MM_SET_FLUSH_ZERO_MODE(ftzMode); - _MM_SET_DENORMALS_ZERO_MODE(dazMode); -#endif } #ifdef HAVE_CUDA @@ -2329,52 +2256,6 @@ public: if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { return group == 1; } - -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - { - if (kernel_size.size() == 3 && preferableTarget != DNN_TARGET_CPU) { - return false; - } - - if (std::accumulate(adjust_pads.begin(), adjust_pads.end(), 0, std::plus()) > 0) - { - if (padMode.empty()) - { - if (preferableTarget != DNN_TARGET_CPU && group != 1) - { - for (int i = 0; i < adjust_pads.size(); i++) { - if (adjust_pads[i] && pads_begin[i]) - return false; - } - } - for (int i = 0; i < adjust_pads.size(); i++) { - if (pads_end[i] < adjust_pads[i]) - return false; - } - return true; - } - else if (padMode == "SAME") - { - for (int i = 0; i < adjust_pads.size(); i++) { - if (kernel_size[i] < pads_begin[i] + 1 + adjust_pads[i]) - return false; - } - return true; - } - else if (padMode == "VALID") - return false; - } - - if (group != 1) - { - return preferableTarget == DNN_TARGET_CPU; - } - if (preferableTarget == DNN_TARGET_OPENCL || preferableTarget == DNN_TARGET_OPENCL_FP16) - return std::accumulate(dilations.begin(), dilations.end(), 1, std::multiplies()) == 1; - return true; - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 #endif // HAVE_INF_ENGINE { return backendId == DNN_BACKEND_CUDA || @@ -3032,64 +2913,6 @@ public: return Ptr(); } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector > &) CV_OVERRIDE - { - CV_Assert(!blobs.empty()); - InferenceEngine::Layout layout = blobs[0].dims == 5? InferenceEngine::Layout::NCDHW : - InferenceEngine::Layout::OIHW; - - auto ieWeights = wrapToInfEngineBlob(blobs[0], layout); - if (fusedWeights) - { - ieWeights = InferenceEngine::make_shared_blob({ - InferenceEngine::Precision::FP32, - ieWeights->getTensorDesc().getDims(), layout - }); - ieWeights->allocate(); - - int inpCn = blobs[0].size[0]; - Mat newWeights = infEngineBlobToMat(ieWeights).reshape(1, inpCn); - transpose(weightsMat, newWeights); - } - - const int outGroupCn = blobs[0].size[1]; // Weights are in IOHW or OIDHW layout - const int group = numOutput / outGroupCn; - - InferenceEngine::Builder::DeconvolutionLayer ieLayer(name); - - ieLayer.setKernel(kernel_size); - ieLayer.setStrides(strides); - ieLayer.setDilation(dilations); - ieLayer.setPaddingsBegin(pads_begin); - - if (padMode.empty()) - { - std::vector paddings_end; - for (int i = 0; i < pads_end.size(); i++) { - paddings_end.push_back(pads_end[i] - adjust_pads[i]); - } - ieLayer.setPaddingsEnd(paddings_end); - } - else if (padMode == "SAME") - { - std::vector paddings_end; - for (int i = 0; i < pads_begin.size(); i++) { - paddings_end.push_back(kernel_size[i] - pads_begin[i] - 1 - adjust_pads[i]); - } - ieLayer.setPaddingsEnd(paddings_end); - } - ieLayer.setGroup((size_t)group); - ieLayer.setOutDepth((size_t)numOutput); - - InferenceEngine::Builder::Layer l = ieLayer; - addConstantData("weights", ieWeights, l); - if (hasBias()) - addConstantData("biases", wrapToInfEngineBlob(biasesMat, {(size_t)numOutput}, InferenceEngine::Layout::C), l); - return Ptr(new InfEngineBackendNode(l)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector > &inputs, diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index 77d86d5652..61d4f44432 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -221,7 +221,7 @@ public: { return backendId == DNN_BACKEND_OPENCV || (backendId == DNN_BACKEND_CUDA && !_groupByClasses) || - ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && !_locPredTransposed && _bboxesNormalized); + (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && !_locPredTransposed && _bboxesNormalized); } bool getMemoryShapes(const std::vector &inputs, @@ -1001,30 +1001,6 @@ public: } #endif -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE - { - InferenceEngine::Builder::DetectionOutputLayer ieLayer(name); - - ieLayer.setNumClasses(_numClasses); - ieLayer.setShareLocation(_shareLocation); - ieLayer.setBackgroudLabelId(_backgroundLabelId); - ieLayer.setNMSThreshold(_nmsThreshold); - ieLayer.setTopK(_topK > 0 ? _topK : _keepTopK); - ieLayer.setKeepTopK(_keepTopK); - ieLayer.setConfidenceThreshold(_confidenceThreshold); - ieLayer.setVariantEncodedInTarget(_varianceEncodedInTarget); - ieLayer.setCodeType("caffe.PriorBoxParameter." + _codeType); - ieLayer.setInputPorts(std::vector(3)); - - InferenceEngine::Builder::Layer l = ieLayer; - l.getParameters()["eta"] = std::string("1.0"); - l.getParameters()["clip"] = _clip; - - return Ptr(new InfEngineBackendNode(l)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index bfabef9d68..0085ba7449 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -186,14 +186,6 @@ public: return Ptr(); } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE - { - InferenceEngine::Builder::Layer ieLayer = func.initInfEngineBuilderAPI(); - ieLayer.setName(this->name); - return Ptr(new InfEngineBackendNode(ieLayer)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE @@ -341,10 +333,6 @@ struct ReLUFunctor : public BaseFunctor bool supportBackend(int backendId, int) { -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - return slope >= 0 || !INF_ENGINE_VER_MAJOR_EQ(INF_ENGINE_RELEASE_2019R1); -#endif #ifdef HAVE_DNN_NGRAPH if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) return true; @@ -462,13 +450,6 @@ struct ReLUFunctor : public BaseFunctor } #endif // HAVE_HALIDE -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - InferenceEngine::Builder::Layer initInfEngineBuilderAPI() - { - return InferenceEngine::Builder::ReLULayer("").setNegativeSlope(slope); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) { @@ -534,11 +515,14 @@ struct ReLU6Functor : public BaseFunctor bool supportBackend(int backendId, int) { +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return true; +#endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_HALIDE || - backendId == DNN_BACKEND_WEBNN || - backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; + backendId == DNN_BACKEND_WEBNN; } void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const @@ -620,12 +604,6 @@ struct ReLU6Functor : public BaseFunctor } #endif // HAVE_HALIDE -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - InferenceEngine::Builder::Layer initInfEngineBuilderAPI() - { - return InferenceEngine::Builder::ClampLayer("").setMinValue(minValue).setMaxValue(maxValue); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) @@ -743,12 +721,6 @@ struct BaseDefaultFunctor : public BaseFunctor } #endif // HAVE_HALIDE -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - InferenceEngine::Builder::Layer initInfEngineBuilderAPI() - { - CV_Error(Error::StsNotImplemented, ""); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) @@ -782,10 +754,13 @@ struct TanHFunctor : public BaseDefaultFunctor bool supportBackend(int backendId, int) { +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return true; +#endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || - backendId == DNN_BACKEND_HALIDE || - backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; + backendId == DNN_BACKEND_HALIDE; } inline float calculate(float x) const @@ -808,13 +783,6 @@ struct TanHFunctor : public BaseDefaultFunctor } #endif // HAVE_HALIDE -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - InferenceEngine::Builder::Layer initInfEngineBuilderAPI() - { - return InferenceEngine::Builder::TanHLayer(""); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) { @@ -937,10 +905,13 @@ struct SigmoidFunctor : public BaseDefaultFunctor bool supportBackend(int backendId, int) { +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return true; +#endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || - backendId == DNN_BACKEND_HALIDE || - backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; + backendId == DNN_BACKEND_HALIDE; } inline float calculate(float x) const @@ -963,12 +934,6 @@ struct SigmoidFunctor : public BaseDefaultFunctor } #endif // HAVE_HALIDE -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - InferenceEngine::Builder::Layer initInfEngineBuilderAPI() - { - return InferenceEngine::Builder::SigmoidLayer(""); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) @@ -992,10 +957,13 @@ struct ELUFunctor : public BaseDefaultFunctor bool supportBackend(int backendId, int) { +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return true; +#endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || - backendId == DNN_BACKEND_HALIDE || - backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; + backendId == DNN_BACKEND_HALIDE; } inline float calculate(float x) const @@ -1023,13 +991,6 @@ struct ELUFunctor : public BaseDefaultFunctor } #endif // HAVE_HALIDE -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - InferenceEngine::Builder::Layer initInfEngineBuilderAPI() - { - return InferenceEngine::Builder::ELULayer(""); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) { @@ -1050,8 +1011,8 @@ struct AbsValFunctor : public BaseDefaultFunctor bool supportBackend(int backendId, int) { #ifdef HAVE_INF_ENGINE - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - return !INF_ENGINE_VER_MAJOR_EQ(INF_ENGINE_RELEASE_2019R1); + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return true; #endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || @@ -1078,12 +1039,6 @@ struct AbsValFunctor : public BaseDefaultFunctor } #endif // HAVE_HALIDE -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - InferenceEngine::Builder::Layer initInfEngineBuilderAPI() - { - return InferenceEngine::Builder::ReLULayer("").setNegativeSlope(-0.999999f); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) @@ -1930,14 +1885,15 @@ struct PowerFunctor : public BaseFunctor bool supportBackend(int backendId, int targetId) { - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - return (targetId != DNN_TARGET_OPENCL && targetId != DNN_TARGET_OPENCL_FP16) || power == 1.0 || power == 0.5; +#ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) return true; - else +#endif + { return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_HALIDE; + } } void finalize() @@ -2029,14 +1985,6 @@ struct PowerFunctor : public BaseFunctor } #endif // HAVE_HALIDE -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - InferenceEngine::Builder::Layer initInfEngineBuilderAPI() - { - return InferenceEngine::Builder::PowerLayer("").setPower(power) - .setScale(scale) - .setShift(shift); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) @@ -2189,10 +2137,13 @@ struct ChannelsPReLUFunctor : public BaseFunctor bool supportBackend(int backendId, int) { +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return true; +#endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || - backendId == DNN_BACKEND_HALIDE || - backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; + backendId == DNN_BACKEND_HALIDE; } void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const @@ -2282,15 +2233,6 @@ struct ChannelsPReLUFunctor : public BaseFunctor } #endif // HAVE_HALIDE -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - InferenceEngine::Builder::Layer initInfEngineBuilderAPI() - { - InferenceEngine::Builder::Layer l = InferenceEngine::Builder::PReLULayer(""); - const size_t numChannels = scale.total(); - addConstantData("weights", wrapToInfEngineBlob(scale, {numChannels}, InferenceEngine::Layout::C), l); - return l; - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 #ifdef HAVE_DNN_NGRAPH std::shared_ptr initNgraphAPI(const std::shared_ptr& node) diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp index 2c473ff412..43d925055c 100644 --- a/modules/dnn/src/layers/eltwise_layer.cpp +++ b/modules/dnn/src/layers/eltwise_layer.cpp @@ -164,6 +164,11 @@ public: if (hasVecInput && ELTWISE_CHANNNELS_SAME) return backendId == DNN_BACKEND_OPENCV; +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return channelsMode == ELTWISE_CHANNNELS_SAME; +#endif + if (backendId == DNN_BACKEND_CUDA) { if(channelsModeInput == ELTWISE_CHANNNELS_INPUT_0 || channelsModeInput == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE) @@ -172,9 +177,8 @@ public: } return backendId == DNN_BACKEND_OPENCV || - (backendId == DNN_BACKEND_HALIDE && op != DIV) || // TODO: not implemented, see PR #15811 - ((((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (preferableTarget != DNN_TARGET_OPENCL || coeffs.empty())) - || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && channelsMode == ELTWISE_CHANNNELS_SAME)); + (backendId == DNN_BACKEND_HALIDE && op != DIV) // TODO: not implemented, see PR #15811 + ; } bool getMemoryShapes(const std::vector &inputs, @@ -837,34 +841,6 @@ public: return Ptr(); } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >& inputs) CV_OVERRIDE - { - InferenceEngine::Builder::EltwiseLayer ieLayer(name); - - ieLayer.setInputPorts(std::vector(inputs.size())); - - if (op == SUM) - ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::SUM); - else if (op == PROD) - ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MUL); - else if (op == DIV) - ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::DIV); - else if (op == MAX) - ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MAX); - else if (op == MIN) - ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MIN); - else - CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation"); - - InferenceEngine::Builder::Layer l = ieLayer; - if (!coeffs.empty()) - l.getParameters()["coeff"] = coeffs; - - return Ptr(new InfEngineBackendNode(l)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/layers/flatten_layer.cpp b/modules/dnn/src/layers/flatten_layer.cpp index 69bc422ee3..b3f57dc7cd 100644 --- a/modules/dnn/src/layers/flatten_layer.cpp +++ b/modules/dnn/src/layers/flatten_layer.cpp @@ -72,9 +72,12 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return true; +#endif return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_CUDA || - ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine()); + backendId == DNN_BACKEND_CUDA; } bool getMemoryShapes(const std::vector &inputs, @@ -171,25 +174,10 @@ public: } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >& inputs) CV_OVERRIDE - { - InferenceEngine::Builder::Layer ieLayer(name); - ieLayer.setName(name); - ieLayer.setType("Flatten"); - ieLayer.getParameters()["axis"] = (size_t)_startAxis; - ieLayer.getParameters()["end_axis"] = _endAxis; // Do not cast to size_t because it might be negative. - ieLayer.setInputPorts(std::vector(1)); - ieLayer.setOutputPorts(std::vector(1)); - return Ptr(new InfEngineBackendNode(ieLayer)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - - #ifdef HAVE_DNN_NGRAPH -virtual Ptr initNgraph(const std::vector >& inputs, - const std::vector >& nodes) CV_OVERRIDE -{ + virtual Ptr initNgraph(const std::vector >& inputs, + const std::vector >& nodes) CV_OVERRIDE + { auto& ieInpNode = nodes[0].dynamicCast()->node; std::vector dims = ieInpNode->get_shape(); diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index 99acba908b..5d0ad5fde7 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -148,12 +148,15 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return axis == 1; +#endif + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || (backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1) || - (backendId == DNN_BACKEND_WEBNN && axis == 1) || - (((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && !blobs.empty()) || - backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && axis == 1); + (backendId == DNN_BACKEND_WEBNN && axis == 1); } virtual bool setActivation(const Ptr& layer) CV_OVERRIDE @@ -570,23 +573,6 @@ public: return Ptr(); } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE - { - InferenceEngine::Builder::FullyConnectedLayer ieLayer(name); - - const int outNum = blobs[0].size[0]; - ieLayer.setOutputNum(outNum); - - InferenceEngine::Builder::Layer l = ieLayer; - addConstantData("weights", wrapToInfEngineBlob(blobs[0], {(size_t)blobs[0].size[0], (size_t)blobs[0].size[1], 1, 1}, InferenceEngine::Layout::OIHW), l); - if (bias) - addConstantData("biases", wrapToInfEngineBlob(blobs[1], {(size_t)outNum}, InferenceEngine::Layout::C), l); - - return Ptr(new InfEngineBackendNode(l)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/layers/layers_common.simd.hpp b/modules/dnn/src/layers/layers_common.simd.hpp index 67a4b3c065..fd88a3c3d2 100644 --- a/modules/dnn/src/layers/layers_common.simd.hpp +++ b/modules/dnn/src/layers/layers_common.simd.hpp @@ -81,6 +81,8 @@ void fastConv( const float* weights, size_t wstep, const float* bias, int blockSize, int vecsize, int vecsize_aligned, const float* relu, bool initOutput ) { + CV_Assert(isAligned<32>(weights)); + int outCn = outShape[1]; size_t outPlaneSize = outShape[2]*outShape[3]; float r0 = 1.f, r1 = 1.f, r2 = 1.f; diff --git a/modules/dnn/src/layers/lrn_layer.cpp b/modules/dnn/src/layers/lrn_layer.cpp index 224441b0e7..6c3a654159 100644 --- a/modules/dnn/src/layers/lrn_layer.cpp +++ b/modules/dnn/src/layers/lrn_layer.cpp @@ -99,12 +99,10 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) return bias == (int)bias; - } - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { - return bias == (int)bias; - } +#endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_HALIDE || @@ -444,24 +442,6 @@ public: #endif // HAVE_HALIDE } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE - { - float alphaSize = alpha; - if (!normBySize) - alphaSize *= (type == SPATIAL_NRM ? size*size : size); - - InferenceEngine::Builder::NormLayer ieLayer(name); - ieLayer.setSize(size); - ieLayer.setAlpha(alphaSize); - ieLayer.setBeta(beta); - ieLayer.setAcrossMaps(type == CHANNEL_NRM); - - InferenceEngine::Builder::Layer l = ieLayer; - l.getParameters()["k"] = bias; - return Ptr(new InfEngineBackendNode(l)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE diff --git a/modules/dnn/src/layers/mvn_layer.cpp b/modules/dnn/src/layers/mvn_layer.cpp index 455f4997df..dc23656b7a 100644 --- a/modules/dnn/src/layers/mvn_layer.cpp +++ b/modules/dnn/src/layers/mvn_layer.cpp @@ -124,14 +124,7 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - { - bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL; - return !zeroDev && (!isMyriad || eps <= 1e-7f); - } -#endif -#ifdef HAVE_DNN_NGRAPH +#ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) return true; #endif @@ -387,16 +380,6 @@ public: } } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE - { - InferenceEngine::Builder::MVNLayer ieLayer(name); - ieLayer.setAcrossChannels(acrossChannels); - ieLayer.setNormalize(normVariance); - ieLayer.setEpsilon(eps); - return Ptr(new InfEngineBackendNode(ieLayer)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/layers/normalize_bbox_layer.cpp b/modules/dnn/src/layers/normalize_bbox_layer.cpp index 236f2e43f1..2017d76801 100644 --- a/modules/dnn/src/layers/normalize_bbox_layer.cpp +++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp @@ -70,17 +70,15 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { if (pnorm != 2) return false; - bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL; - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && isMyriad) - return !acrossSpatial; - return startAxis == 1; } +#endif return backendId == DNN_BACKEND_OPENCV || (backendId == DNN_BACKEND_CUDA && (pnorm == 1 || pnorm == 2)); } @@ -270,58 +268,6 @@ public: } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >& inputs) CV_OVERRIDE - { - InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]); - std::vector dims = input->getDims(); - if (dims.size() == 4) - { - InferenceEngine::Builder::NormalizeLayer ieLayer(name); - - ieLayer.setChannelShared(false); - ieLayer.setAcrossMaps(acrossSpatial); - ieLayer.setEpsilon(epsilon); - - InferenceEngine::Builder::Layer l = ieLayer; - const int numChannels = dims[1]; - InferenceEngine::Blob::Ptr weights; - if (blobs.empty()) - { - weights = InferenceEngine::make_shared_blob({ - InferenceEngine::Precision::FP32, - {(size_t)numChannels}, InferenceEngine::Layout::C - }); - weights->allocate(); - - Mat weightsMat = infEngineBlobToMat(weights).reshape(1, numChannels); - Mat(numChannels, 1, CV_32F, Scalar(1)).copyTo(weightsMat); - l.getParameters()["channel_shared"] = false; - } - else - { - CV_Assert(numChannels == blobs[0].total()); - weights = wrapToInfEngineBlob(blobs[0], {(size_t)numChannels}, InferenceEngine::Layout::C); - l.getParameters()["channel_shared"] = blobs[0].total() == 1; - } - addConstantData("weights", weights, l); - l.getParameters()["across_spatial"] = acrossSpatial; - return Ptr(new InfEngineBackendNode(l)); - } - else - { - InferenceEngine::Builder::GRNLayer ieLayer(name); - ieLayer.setBeta(epsilon); - - InferenceEngine::Builder::Layer l = ieLayer; - l.getParameters()["bias"] = epsilon; - - return Ptr(new InfEngineBackendNode(l)); - } - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - - #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE diff --git a/modules/dnn/src/layers/not_implemented_layer.cpp b/modules/dnn/src/layers/not_implemented_layer.cpp index c4b1343902..3fd52c09d9 100644 --- a/modules/dnn/src/layers/not_implemented_layer.cpp +++ b/modules/dnn/src/layers/not_implemented_layer.cpp @@ -87,11 +87,6 @@ public: CV_Error(Error::StsNotImplemented, msg); } - virtual Ptr initInfEngine(const std::vector > &inputs) CV_OVERRIDE - { - CV_Error(Error::StsNotImplemented, msg); - } - virtual Ptr initNgraph(const std::vector > &inputs, const std::vector >& nodes) CV_OVERRIDE { diff --git a/modules/dnn/src/layers/padding_layer.cpp b/modules/dnn/src/layers/padding_layer.cpp index 7534145f53..aea8ab3168 100644 --- a/modules/dnn/src/layers/padding_layer.cpp +++ b/modules/dnn/src/layers/padding_layer.cpp @@ -102,10 +102,10 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { #ifdef HAVE_INF_ENGINE - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL; - if (INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) && isMyriad) + if (isMyriad) return dstRanges.size() == 4 && paddings[0].first == 0 && paddings[0].second == 0; return (dstRanges.size() <= 4 || !isArmComputePlugin()); @@ -219,30 +219,6 @@ public: return Ptr(); } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE - { - InferenceEngine::Builder::Layer ieLayer(name); - ieLayer.setName(name); - ieLayer.setType("Pad"); - - std::vector begins(paddings.size(), 0), ends(paddings.size(), 0); - for (int i = 0; i < paddings.size(); ++i) - { - begins[i] = paddings[i].first; - ends[i] = paddings[i].second; - } - ieLayer.getParameters()["pads_begin"] = begins; - ieLayer.getParameters()["pads_end"] = ends; - ieLayer.getParameters()["pad_mode"] = paddingType; - if (paddingType == "constant") - ieLayer.getParameters()["pad_value"] = paddingValue; - - ieLayer.setInputPorts(std::vector(1)); - ieLayer.setOutputPorts(std::vector(1)); - return Ptr(new InfEngineBackendNode(ieLayer)); - } -#endif #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/layers/permute_layer.cpp b/modules/dnn/src/layers/permute_layer.cpp index 9e66eb6a64..033b3d9aee 100644 --- a/modules/dnn/src/layers/permute_layer.cpp +++ b/modules/dnn/src/layers/permute_layer.cpp @@ -115,13 +115,16 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { #ifdef HAVE_INF_ENGINE - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && preferableTarget == DNN_TARGET_CPU) - return _order.size() <= 4 || !isArmComputePlugin(); + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (preferableTarget == DNN_TARGET_CPU) + return _order.size() <= 4 || !isArmComputePlugin(); + return true; + } #endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_WEBNN || - ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine()) || (backendId == DNN_BACKEND_VKCOM && haveVulkan()); } @@ -418,16 +421,6 @@ public: } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE - { - InferenceEngine::Builder::PermuteLayer ieLayer(name); - ieLayer.setOrder(_order); - return Ptr(new InfEngineBackendNode(ieLayer)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - - #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 7cb86a9515..f8616a4184 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -199,34 +199,13 @@ public: { return type == MAX || type == AVE || type == ROI; } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - { - if (computeMaxIdx) - return false; - if (kernel_size.size() == 3) - return preferableTarget == DNN_TARGET_CPU; - if (kernel_size.size() == 1) - return false; - if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL) { -#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1) - if (type == MAX && (pads_begin[1] == 1 && pads_begin[0] == 1) && (strides[0] == 2 && strides[1] == 2)) { - return !isMyriadX(); - } -#endif - return type == MAX || type == AVE; - } - else - return type != STOCHASTIC && type != SUM; - } -#endif +#ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { -#ifdef HAVE_DNN_NGRAPH return !computeMaxIdx && type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin()); -#endif } - else if (backendId == DNN_BACKEND_OPENCV) +#endif + if (backendId == DNN_BACKEND_OPENCV) { if (kernel_size.size() == 3) return preferableTarget == DNN_TARGET_CPU; @@ -550,54 +529,6 @@ public: return Ptr(); } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE - { - if (type == MAX || type == AVE) - { - InferenceEngine::Builder::PoolingLayer ieLayer(name); - - ieLayer.setKernel(kernel_size); - ieLayer.setStrides(strides); - ieLayer.setPaddingsBegin(pads_begin); - ieLayer.setPaddingsEnd(pads_end); - - ieLayer.setPoolingType(type == MAX ? - InferenceEngine::Builder::PoolingLayer::PoolingType::MAX : - InferenceEngine::Builder::PoolingLayer::PoolingType::AVG); - ieLayer.setRoundingType(ceilMode ? - InferenceEngine::Builder::PoolingLayer::RoundingType::CEIL : - InferenceEngine::Builder::PoolingLayer::RoundingType::FLOOR); - ieLayer.setExcludePad(!avePoolPaddedArea); - - InferenceEngine::Builder::Layer l = ieLayer; - if (!padMode.empty()) - l.getParameters()["auto_pad"] = padMode == "VALID" ? std::string("valid") : std::string("same_upper"); - return Ptr(new InfEngineBackendNode(l)); - } - else if (type == ROI) - { - InferenceEngine::Builder::ROIPoolingLayer ieLayer(name); - ieLayer.setSpatialScale(spatialScale); - ieLayer.setPooled({pooledSize.height, pooledSize.width}); - ieLayer.setInputPorts(std::vector(2)); - return Ptr(new InfEngineBackendNode(ieLayer)); - } - else if (type == PSROI) - { - InferenceEngine::Builder::PSROIPoolingLayer ieLayer(name); - ieLayer.setSpatialScale(spatialScale); - ieLayer.setOutputDim(psRoiOutChannels); - ieLayer.setGroupSize(pooledSize.width); - ieLayer.setInputPorts(std::vector(2)); - return Ptr(new InfEngineBackendNode(ieLayer)); - } - else - CV_Error(Error::StsNotImplemented, "Unsupported pooling type"); - return Ptr(); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/layers/prior_box_layer.cpp b/modules/dnn/src/layers/prior_box_layer.cpp index f7340b1e67..160b36c18d 100644 --- a/modules/dnn/src/layers/prior_box_layer.cpp +++ b/modules/dnn/src/layers/prior_box_layer.cpp @@ -298,9 +298,7 @@ public: #endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || - (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && haveInfEngine() && - ( _explicitSizes || (_minSize.size() == 1 && _maxSize.size() <= 1))) - || (backendId == DNN_BACKEND_VKCOM && haveVulkan()); + (backendId == DNN_BACKEND_VKCOM && haveVulkan()); } bool getMemoryShapes(const std::vector &inputs, @@ -510,69 +508,6 @@ public: } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE - { - if (_explicitSizes) - { - InferenceEngine::Builder::PriorBoxClusteredLayer ieLayer(name); - ieLayer.setSteps({_stepY, _stepX}); - - CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], ""); - ieLayer.setOffset(_offsetsX[0]); - - ieLayer.setClip(_clip); - ieLayer.setFlip(false); // We already flipped aspect ratios. - - InferenceEngine::Builder::Layer l = ieLayer; - - CV_Assert_N(!_boxWidths.empty(), !_boxHeights.empty(), !_variance.empty()); - CV_Assert(_boxWidths.size() == _boxHeights.size()); - l.getParameters()["width"] = _boxWidths; - l.getParameters()["height"] = _boxHeights; - l.getParameters()["variance"] = _variance; - return Ptr(new InfEngineBackendNode(l)); - } - else - { - InferenceEngine::Builder::PriorBoxLayer ieLayer(name); - - CV_Assert(!_explicitSizes); - ieLayer.setMinSize(_minSize[0]); - if (!_maxSize.empty()) - ieLayer.setMaxSize(_maxSize[0]); - - CV_CheckEQ(_offsetsX.size(), (size_t)1, ""); CV_CheckEQ(_offsetsY.size(), (size_t)1, ""); CV_CheckEQ(_offsetsX[0], _offsetsY[0], ""); - ieLayer.setOffset(_offsetsX[0]); - - ieLayer.setClip(_clip); - ieLayer.setFlip(false); // We already flipped aspect ratios. - - InferenceEngine::Builder::Layer l = ieLayer; - if (_stepX == _stepY) - { - l.getParameters()["step"] = _stepX; - l.getParameters()["step_h"] = 0.0f; - l.getParameters()["step_w"] = 0.0f; - } - else - { - l.getParameters()["step"] = 0.0f; - l.getParameters()["step_h"] = _stepY; - l.getParameters()["step_w"] = _stepX; - } - if (!_aspectRatios.empty()) - { - l.getParameters()["aspect_ratio"] = _aspectRatios; - } - CV_Assert(!_variance.empty()); - l.getParameters()["variance"] = _variance; - return Ptr(new InfEngineBackendNode(l)); - } - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - - #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE { diff --git a/modules/dnn/src/layers/proposal_layer.cpp b/modules/dnn/src/layers/proposal_layer.cpp index aeb5d44a47..e9edcf1547 100644 --- a/modules/dnn/src/layers/proposal_layer.cpp +++ b/modules/dnn/src/layers/proposal_layer.cpp @@ -96,7 +96,7 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { #ifdef HAVE_INF_ENGINE - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL; return !isMyriad; @@ -338,32 +338,6 @@ public: layerOutputs[0].col(2).copyTo(dst); } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE - { - InferenceEngine::Builder::ProposalLayer ieLayer(name); - - ieLayer.setBaseSize(baseSize); - ieLayer.setFeatStride(featStride); - ieLayer.setMinSize(16); - ieLayer.setNMSThresh(nmsThreshold); - ieLayer.setPostNMSTopN(keepTopAfterNMS); - ieLayer.setPreNMSTopN(keepTopBeforeNMS); - - std::vector scalesVec(scales.size()); - for (int i = 0; i < scales.size(); ++i) - scalesVec[i] = scales.get(i); - ieLayer.setScale(scalesVec); - - std::vector ratiosVec(ratios.size()); - for (int i = 0; i < ratios.size(); ++i) - ratiosVec[i] = ratios.get(i); - ieLayer.setRatio(ratiosVec); - - return Ptr(new InfEngineBackendNode(ieLayer)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/layers/recurrent_layers.cpp b/modules/dnn/src/layers/recurrent_layers.cpp index 14210becb4..dbb3e2700a 100644 --- a/modules/dnn/src/layers/recurrent_layers.cpp +++ b/modules/dnn/src/layers/recurrent_layers.cpp @@ -184,7 +184,7 @@ public: CV_Assert(!reverse || !bidirectional); // read activations - DictValue activations = params.get("activations", ""); + DictValue activations = params.get("activations", DictValue(String())); if (activations.size() == 1) // if activations wasn't specified use default { f_activation = sigmoid; diff --git a/modules/dnn/src/layers/reorg_layer.cpp b/modules/dnn/src/layers/reorg_layer.cpp index 797df4819d..ac7d1abfb1 100644 --- a/modules/dnn/src/layers/reorg_layer.cpp +++ b/modules/dnn/src/layers/reorg_layer.cpp @@ -151,10 +151,12 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return true; +#endif return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_CUDA || - backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || - backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; + backendId == DNN_BACKEND_CUDA; } #ifdef HAVE_OPENCL @@ -198,16 +200,6 @@ public: } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE - { - InferenceEngine::Builder::ReorgYoloLayer ieLayer(name); - ieLayer.setStride(reorgStride); - return Ptr(new InfEngineBackendNode(ieLayer)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - - #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector > &inputs, const std::vector >& nodes) CV_OVERRIDE diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp index 0ba3abf047..f62235dc20 100644 --- a/modules/dnn/src/layers/reshape_layer.cpp +++ b/modules/dnn/src/layers/reshape_layer.cpp @@ -202,10 +202,13 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return true; +#endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || - backendId == DNN_BACKEND_WEBNN || - ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine()); + backendId == DNN_BACKEND_WEBNN; } bool getMemoryShapes(const std::vector &inputs, @@ -306,17 +309,6 @@ public: } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >& inputs) CV_OVERRIDE - { - InferenceEngine::Builder::ReshapeLayer ieLayer(name); - CV_Assert(outShapes.size() == 1); - ieLayer.setDims(outShapes[0]); - return Ptr(new InfEngineBackendNode(ieLayer)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - - #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE diff --git a/modules/dnn/src/layers/resize_layer.cpp b/modules/dnn/src/layers/resize_layer.cpp index 42eb2e2331..ab640dbf3f 100644 --- a/modules/dnn/src/layers/resize_layer.cpp +++ b/modules/dnn/src/layers/resize_layer.cpp @@ -78,7 +78,7 @@ public: return interpolation == "nearest" || interpolation == "bilinear" || interpolation == "opencv_linear"; #ifdef HAVE_INF_ENGINE - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { return (interpolation == "nearest" && scaleWidth == scaleHeight) || (interpolation == "bilinear"); @@ -308,38 +308,6 @@ public: } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE - { - InferenceEngine::Builder::Layer ieLayer(name); - ieLayer.setName(name); - if (interpolation == "nearest") - { - ieLayer.setType("Resample"); - ieLayer.getParameters()["type"] = std::string("caffe.ResampleParameter.NEAREST"); - ieLayer.getParameters()["antialias"] = false; - if (scaleWidth != scaleHeight) - CV_Error(Error::StsNotImplemented, "resample with sw != sh"); - ieLayer.getParameters()["factor"] = 1.0f / scaleWidth; - } - else if (interpolation == "bilinear") - { - ieLayer.setType("Interp"); - ieLayer.getParameters()["pad_beg"] = 0; - ieLayer.getParameters()["pad_end"] = 0; - ieLayer.getParameters()["align_corners"] = alignCorners; - } - else - CV_Error(Error::StsNotImplemented, "Unsupported interpolation: " + interpolation); - ieLayer.getParameters()["width"] = outWidth; - ieLayer.getParameters()["height"] = outHeight; - ieLayer.setInputPorts(std::vector(1)); - ieLayer.setOutputPorts(std::vector(1)); - return Ptr(new InfEngineBackendNode(ieLayer)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - - #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp index fcee451556..d727734cf3 100644 --- a/modules/dnn/src/layers/scale_layer.cpp +++ b/modules/dnn/src/layers/scale_layer.cpp @@ -78,11 +78,13 @@ public: { return backendId == DNN_BACKEND_OPENCV; } +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return axis > 0; +#endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_HALIDE || - (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && axis == 1 && !blobs.empty()) || - (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && axis > 0) || (backendId == DNN_BACKEND_WEBNN && axis >0); } @@ -314,34 +316,6 @@ public: } #endif // HAVE_HALIDE -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE - { - InferenceEngine::Builder::Layer l = InferenceEngine::Builder::ScaleShiftLayer(name); - - CV_Assert(!blobs.empty()); - const size_t numChannels = blobs[0].total(); - if (hasWeights) - { - addConstantData("weights", wrapToInfEngineBlob(blobs[0], {numChannels}, InferenceEngine::Layout::C), l); - } - else - { - auto weights = InferenceEngine::make_shared_blob({ - InferenceEngine::Precision::FP32, {(size_t)numChannels}, - InferenceEngine::Layout::C - }); - weights->allocate(); - float* buf = weights->buffer().as(); - std::fill(buf, buf + numChannels, 1); - addConstantData("weights", weights, l); - } - if (hasBias) - addConstantData("biases", wrapToInfEngineBlob(blobs.back(), {numChannels}, InferenceEngine::Layout::C), l); - return Ptr(new InfEngineBackendNode(l)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index 20b493636a..4370e566a5 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -166,12 +166,7 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) && - sliceRanges.size() == 1 && sliceRanges[0].size() == 4 && !hasSteps; -#endif -#ifdef HAVE_DNN_NGRAPH +#ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) return sliceRanges.size() == 1 && !hasSteps; #endif @@ -573,64 +568,6 @@ public: } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 -#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) - virtual Ptr initInfEngine(const std::vector >& inputs) CV_OVERRIDE - { - CV_Assert_N(finalSliceRanges.size() == 1, inputs.size() <= 2); - - std::vector axes, offsets, dims; - int from, to, step; - int numDims = finalSliceRanges[0].size(); - if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL) - { - from = axis; - to = numDims; - step = 1; - } - else - { - from = numDims - 1; - to = axis - 1; - step = -1; - } - for (int i = from; i != to; i += step) - { - axes.push_back(i); - offsets.push_back(finalSliceRanges[0][i].start); - dims.push_back(finalSliceRanges[0][i].size()); - } - - InferenceEngine::Builder::Layer ieLayer(name); - ieLayer.setName(name); - ieLayer.setType("Crop"); - ieLayer.getParameters()["axis"] = axes; - ieLayer.getParameters()["dim"] = dims; - ieLayer.getParameters()["offset"] = offsets; - ieLayer.setInputPorts(std::vector(2)); - ieLayer.setOutputPorts(std::vector(1)); - - if (inputs.size() != 2) - { - std::vector outShape(numDims); - for (int i = 0; i < numDims; ++i) - outShape[i] = finalSliceRanges[0][i].size(); - - ieLayer.getInputPorts()[1].setParameter("type", "weights"); - - auto shapeSource = InferenceEngine::make_shared_blob({ - InferenceEngine::Precision::FP32, outShape, - InferenceEngine::Layout::ANY - }); - shapeSource->allocate(); - addConstantData("weights", shapeSource, ieLayer); - } - return Ptr(new InfEngineBackendNode(ieLayer)); - } -#endif -#endif - - #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE diff --git a/modules/dnn/src/layers/softmax_layer.cpp b/modules/dnn/src/layers/softmax_layer.cpp index db2951808f..790f181325 100644 --- a/modules/dnn/src/layers/softmax_layer.cpp +++ b/modules/dnn/src/layers/softmax_layer.cpp @@ -99,6 +99,10 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return true; +#endif #ifdef HAVE_WEBNN if (backendId == DNN_BACKEND_WEBNN) { // TODO: support logSoftMax @@ -112,8 +116,6 @@ public: return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || (backendId == DNN_BACKEND_HALIDE && haveHalide() && axisRaw == 1) || - backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH || - (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && haveInfEngine() && !logSoftMax) || (backendId == DNN_BACKEND_VKCOM && haveVulkan()); } @@ -360,17 +362,6 @@ public: return Ptr(); } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - virtual Ptr initInfEngine(const std::vector >& inputs) CV_OVERRIDE - { - InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]); - - InferenceEngine::Builder::SoftMaxLayer ieLayer(name); - ieLayer.setAxis(normalize_axis(axisRaw, input->getDims().size())); - - return Ptr(new InfEngineBackendNode(ieLayer)); - } -#endif // HAVE_DNN_IE_NN_BUILDER_2019 #ifdef HAVE_DNN_NGRAPH virtual Ptr initNgraph(const std::vector >& inputs, diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index a2b28462e8..7cfc546b12 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -10,6 +10,8 @@ #include +#include + #include #undef CV_LOG_STRIP_LEVEL #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1 @@ -48,8 +50,12 @@ CV__DNN_INLINE_NS_BEGIN extern bool DNN_DIAGNOSTICS_RUN; +class ONNXLayerHandler; + class ONNXImporter { + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + opencv_onnx::ModelProto model_proto; struct LayerInfo { int layerId; @@ -80,7 +86,7 @@ public: void populateNet(); protected: - std::unique_ptr missingLayerHandler; + std::unique_ptr layerHandler; Net& dstNet; opencv_onnx::GraphProto graph_proto; @@ -94,15 +100,19 @@ protected: std::map layer_id; typedef std::map::iterator IterLayerId_t; + typedef std::map::const_iterator ConstIterLayerId_t; void handleNode(const opencv_onnx::NodeProto& node_proto); private: + friend class ONNXLayerHandler; typedef void (ONNXImporter::*ONNXImporterNodeParser)(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); typedef std::map DispatchMap; typedef std::map DomainDispatchMap; DomainDispatchMap domain_dispatch_map; + std::string getLayerTypeDomain(const opencv_onnx::NodeProto& node_proto); + const DispatchMap& getDispatchMap(const opencv_onnx::NodeProto& node_proto); void buildDispatchMap_ONNX_AI(int opset_version); void buildDispatchMap_COM_MICROSOFT(int opset_version); @@ -156,6 +166,7 @@ private: void parseSoftMax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseDetectionOutput (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseCumSum (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseSimpleLayers (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); // Domain: com.microsoft // URL: https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md @@ -178,9 +189,38 @@ private: const std::string str_domain_ai_onnx = "ai.onnx"; }; +class ONNXLayerHandler : public detail::LayerHandler +{ +public: + explicit ONNXLayerHandler(ONNXImporter* importer_); + + void fillRegistry(const opencv_onnx::GraphProto& net); + +protected: + ONNXImporter* importer; +}; + +ONNXLayerHandler::ONNXLayerHandler(ONNXImporter* importer_) : importer(importer_){} + +void ONNXLayerHandler::fillRegistry(const opencv_onnx::GraphProto &net) +{ + int layersSize = net.node_size(); + for (int li = 0; li < layersSize; li++) { + const opencv_onnx::NodeProto &node_proto = net.node(li); + const std::string& name = node_proto.output(0); + const std::string& type = node_proto.op_type(); + const std::string& layer_type_domain = importer->getLayerTypeDomain(node_proto); + const auto& dispatch = importer->getDispatchMap(node_proto); + if (dispatch.find(type) == dispatch.end()) + { + addMissing(name, cv::format("%s.%s", layer_type_domain.c_str(), type.c_str())); + } + } + printMissing(); +} ONNXImporter::ONNXImporter(Net& net, const char *onnxFile) - : missingLayerHandler(DNN_DIAGNOSTICS_RUN ? new detail::LayerHandler() : nullptr) + : layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr) , dstNet(net) , onnx_opset(0) { @@ -203,7 +243,7 @@ ONNXImporter::ONNXImporter(Net& net, const char *onnxFile) } ONNXImporter::ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer) - : missingLayerHandler(DNN_DIAGNOSTICS_RUN ? new detail::LayerHandler() : nullptr) + : layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr) , dstNet(net) , onnx_opset(0) { @@ -517,7 +557,11 @@ void ONNXImporter::addLayer(LayerParams& layerParams, int id = dstNet.addLayer(layerParams.name, layerParams.type, depth, layerParams); for (int i = 0; i < node_proto.output_size(); ++i) { - layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(id, i))); + const std::string& output_name = node_proto.output(i); + if (!output_name.empty()) + { + layer_id.insert(std::make_pair(output_name, LayerInfo(id, i))); + } } std::vector layerInpShapes, layerOutShapes, layerInternalShapes; @@ -540,7 +584,11 @@ void ONNXImporter::addLayer(LayerParams& layerParams, layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes); for (int i = 0; i < node_proto.output_size() && i < (int)layerOutShapes.size(); ++i) { - outShapes[node_proto.output(i)] = layerOutShapes[i]; + const std::string& output_name = node_proto.output(i); + if (!output_name.empty()) + { + outShapes[node_proto.output(i)] = layerOutShapes[i]; + } } } @@ -795,6 +843,7 @@ void ONNXImporter::populateNet() if (DNN_DIAGNOSTICS_RUN) { CV_LOG_INFO(NULL, "DNN/ONNX: start diagnostic run!"); + layerHandler->fillRegistry(graph_proto); } for(int li = 0; li < layersSize; li++) @@ -803,57 +852,92 @@ void ONNXImporter::populateNet() handleNode(node_proto); } + // register outputs + for (int i = 0; i < graph_proto.output_size(); ++i) + { + const std::string& output_name = graph_proto.output(i).name(); + if (output_name.empty()) + { + CV_LOG_ERROR(NULL, "DNN/ONNX: can't register output without name: " << i); + continue; + } + ConstIterLayerId_t layerIt = layer_id.find(output_name); + if (layerIt == layer_id.end()) + { + CV_LOG_ERROR(NULL, "DNN/ONNX: can't find layer for output name: '" << output_name << "'. Does model imported properly?"); + continue; + } + + const LayerInfo& li = layerIt->second; + int outputId = dstNet.registerOutput(output_name, li.layerId, li.outputId); CV_UNUSED(outputId); + // no need to duplicate message from engine: CV_LOG_DEBUG(NULL, "DNN/ONNX: registered output='" << output_name << "' with id=" << outputId); + } + CV_LOG_DEBUG(NULL, (DNN_DIAGNOSTICS_RUN ? "DNN/ONNX: diagnostic run completed!" : "DNN/ONNX: import completed!")); } +std::string ONNXImporter::getLayerTypeDomain(const opencv_onnx::NodeProto& node_proto) +{ + if (!node_proto.has_domain()) + return str_domain_ai_onnx; + const std::string& domain = node_proto.domain(); + if (domain.empty()) + return str_domain_ai_onnx; + return domain; +} + +const ONNXImporter::DispatchMap& ONNXImporter::getDispatchMap(const opencv_onnx::NodeProto& node_proto) +{ + static DispatchMap empty_map; + const std::string& layer_type_domain = getLayerTypeDomain(node_proto); + auto it = domain_dispatch_map.find(layer_type_domain); + if (it == domain_dispatch_map.end()) + { + return empty_map; + } + + return it->second; +} + +const std::string& extractNodeName(const opencv_onnx::NodeProto& node_proto) +{ + if (node_proto.has_name() && !node_proto.name().empty()) + { + return node_proto.name(); + } + for (int i = 0; i < node_proto.output_size(); ++i) + { + const std::string& name = node_proto.output(i); + // There are two ways to leave an optional input or output unspecified: + // the first, available only for trailing inputs and outputs, is to simply not provide that input; + // the second method is to use an empty string in place of an input or output name. + if (!name.empty()) + { + return name; + } + } + CV_Error(Error::StsAssert, "Couldn't deduce Node name."); +} + void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto) { CV_Assert(node_proto.output_size() >= 1); - const std::string& name = node_proto.output(0); + const std::string& name = extractNodeName(node_proto); const std::string& layer_type = node_proto.op_type(); - const std::string& layer_type_domain = [&]() + const std::string& layer_type_domain = getLayerTypeDomain(node_proto); + const auto& dispatch = getDispatchMap(node_proto); + + CV_LOG_DEBUG(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and " + << node_proto.output_size() << " outputs: " + << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) + << cv::format(" from %sdomain='", onnx_opset_map.count(layer_type_domain) == 1 ? "" : "undeclared ") + << layer_type_domain << "'" + ); + + if (dispatch.empty()) { - if (!node_proto.has_domain()) - return str_domain_ai_onnx; - const std::string& domain = node_proto.domain(); - if (domain.empty()) - return str_domain_ai_onnx; - return domain; - }(); - const auto& dispatch = [&]() - { - if (layer_type_domain != str_domain_ai_onnx) - { - if (onnx_opset_map.find(layer_type_domain) == onnx_opset_map.end()) - { - CV_LOG_WARNING(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " - << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) - << " from undeclared domain='" << layer_type_domain << "'" - ); - } - else - { - CV_LOG_DEBUG(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " - << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) - << " from domain='" << layer_type_domain << "'" - ); - } - auto it = domain_dispatch_map.find(layer_type_domain); - if (it == domain_dispatch_map.end()) - { - CV_LOG_WARNING(NULL, "DNN/ONNX: missing dispatch map for domain='" << layer_type_domain << "'"); - return DispatchMap(); - } - return it->second; - } - else - { - CV_LOG_DEBUG(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " - << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) - ); - return domain_dispatch_map[str_domain_ai_onnx]; - } - }(); + CV_LOG_WARNING(NULL, "DNN/ONNX: missing dispatch map for domain='" << layer_type_domain << "'"); + } LayerParams layerParams; try @@ -1007,6 +1091,7 @@ void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::Node { opencv_onnx::NodeProto node_proto = node_proto_; const std::string& layer_type = node_proto.op_type(); + const std::string output_name = node_proto.output(0); CV_Assert(node_proto.input_size() == 1); layerParams.type = "Pooling"; @@ -1127,7 +1212,7 @@ void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::Node layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size())); node_proto.set_input(0, node_proto.output(0)); - node_proto.set_output(0, layerParams.name); + node_proto.set_output(0, output_name); } else if (!layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax")) { @@ -1160,7 +1245,7 @@ void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::Node layerParams.set("dim", DictValue::arrayInt(targetShape.data(), targetShape.size())); node_proto.set_input(0, node_proto.output(0)); - node_proto.set_output(0, layerParams.name); + node_proto.set_output(0, output_name); } addLayer(layerParams, node_proto); } @@ -1188,6 +1273,7 @@ void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeP CV_Assert(starts.size() == ends.size()); if (axis > 0) { + CV_CheckLE(axis, 1024, "Slice layer can't have more than 1024 axes"); // arbitrary limit begin.resize(axis, 0); end.resize(axis, -1); } @@ -1250,7 +1336,7 @@ void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeP { Mat flipped; flip(inp, flipped, 0); - addConstant(layerParams.name, flipped); + addConstant(node_proto.output(0), flipped); return; } } @@ -1270,7 +1356,7 @@ void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeP inputs.push_back(inp); runLayer(layerParams, inputs, sliced); CV_Assert(sliced.size() == 1); - addConstant(layerParams.name, sliced[0]); + addConstant(node_proto.output(0), sliced[0]); return; } addLayer(layerParams, node_proto); @@ -1335,7 +1421,7 @@ void ONNXImporter::parseBias(LayerParams& layerParams, const opencv_onnx::NodePr Mat blob_1 = getBlob(node_proto, 1); CV_Assert(blob_0.size == blob_1.size); Mat output = isSub ? (blob_0 - blob_1) : (blob_0 + blob_1); - addConstant(layerParams.name, output); + addConstant(node_proto.output(0), output); return; } else if (is_const_0 || is_const_1) @@ -1451,12 +1537,13 @@ void ONNXImporter::parseConstant(LayerParams& layerParams, const opencv_onnx::No { CV_Assert(node_proto.input_size() == 0); CV_Assert(layerParams.blobs.size() == 1); - addConstant(layerParams.name, layerParams.blobs[0]); + addConstant(node_proto.output(0), layerParams.blobs[0]); } void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) { opencv_onnx::NodeProto node_proto = node_proto_; + const std::string output_name = node_proto.output(0); LayerParams lstmParams = layerParams; lstmParams.name += "/lstm"; @@ -1470,6 +1557,13 @@ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodePr const int numDirs = Wx.size[0]; // Is 1 for forward only and 2 for bidirectional LSTM. const int numFeatures = Wx.size[2]; + // Following checks are deduced from the IFGO->IGFO loop below + // Wx is numDirs X numHidden*3 X numFeatures + // Wh is numDirs X numHidden*3 X numHidden + CV_CheckLE(numHidden * 3, Wx.size[1], "Wx should have beat least 3x hidden_size in dimension 1"); + CV_CheckLE(numHidden * 3, Wh.size[1], "Wh should have be at least 3x hidden_size in dimension 1"); + CV_CheckLE(numHidden, Wh.size[2], "Wh should have be at least hidden_size in dimension 2"); + Mat h0, c0; if (!node_proto.input(5).empty()) { h0 = getBlob(node_proto, 5); @@ -1491,6 +1585,9 @@ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodePr Mat bh = b.colRange(b.cols / 2, b.cols); b = bx + bh; + // b is numDirs X numHidden*3 + CV_CheckLE(numHidden * 3, b.cols, "Bias data should have at least 3x hidden_size columns"); + // IFGO->IGFO for (int k = 0; k < numDirs; ++k) { @@ -1538,13 +1635,14 @@ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodePr layerParams.type = "Reshape"; layerParams.set("dim", DictValue::arrayInt(&lstmShape[0], lstmShape.size())); node_proto.set_input(0, lstmParams.name); // redirect input to LSTM - node_proto.set_output(0, layerParams.name); // keep origin LSTM's name + node_proto.set_output(0, output_name); // keep origin LSTM's name addLayer(layerParams, node_proto); } void ONNXImporter::parseGRU(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) { opencv_onnx::NodeProto node_proto = node_proto_; + const std::string output_name = node_proto.output(0); LayerParams gruParams = layerParams; gruParams.name += "/gru"; @@ -1578,7 +1676,7 @@ void ONNXImporter::parseGRU(LayerParams& layerParams, const opencv_onnx::NodePro layerParams.type = "Reshape"; layerParams.set("dim", DictValue::arrayInt(&gruShape[0], gruShape.size())); node_proto.set_input(0, gruParams.name); // redirect input to GRU - node_proto.set_output(0, layerParams.name); // keep origin GRU's name + node_proto.set_output(0, output_name); // keep origin GRU's name addLayer(layerParams, node_proto); } @@ -1852,6 +1950,7 @@ void ONNXImporter::parseMul(LayerParams& layerParams, const opencv_onnx::NodePro { opencv_onnx::NodeProto node_proto = node_proto_; const std::string& layer_type = node_proto.op_type(); + const std::string output_name = node_proto.output(0); CV_Assert(node_proto.input_size() == 2); bool isDiv = layer_type == "Div"; @@ -1936,7 +2035,7 @@ void ONNXImporter::parseMul(LayerParams& layerParams, const opencv_onnx::NodePro if (inp0.dims == 1 && inp1.dims == 1) out.dims = 1; // to workaround dims == 1 - addConstant(layerParams.name, out); + addConstant(output_name, out); return; } else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)]) @@ -1952,7 +2051,7 @@ void ONNXImporter::parseMul(LayerParams& layerParams, const opencv_onnx::NodePro opencv_onnx::NodeProto proto; proto.add_input(node_proto.input(1)); proto.add_input(node_proto.input(0)); - proto.add_output(layerParams.name); + proto.add_output(output_name); node_proto = proto; } @@ -2131,7 +2230,7 @@ void ONNXImporter::parseTranspose(LayerParams& layerParams, const opencv_onnx::N std::vector inputs(1, getBlob(node_proto, 0)), transposed; runLayer(layerParams, inputs, transposed); CV_Assert(transposed.size() == 1); - addConstant(layerParams.name, transposed[0]); + addConstant(node_proto.output(0), transposed[0]); return; } addLayer(layerParams, node_proto); @@ -2183,7 +2282,7 @@ void ONNXImporter::parseSqueeze(LayerParams& layerParams, const opencv_onnx::Nod Mat inp = getBlob(node_proto, 0); Mat out = inp.reshape(1, outShape); out.dims = outShape.size(); // to workaround dims == 1 - addConstant(layerParams.name, out); + addConstant(node_proto.output(0), out); return; } int depth = layerParams.get("depth", CV_32F); @@ -2212,7 +2311,7 @@ void ONNXImporter::parseFlatten(LayerParams& layerParams, const opencv_onnx::Nod } Mat output = input.reshape(1, 2, out_size); - addConstant(layerParams.name, output); + addConstant(node_proto.output(0), output); return; } IterShape_t shapeIt = outShapes.find(node_proto.input(0)); @@ -2284,7 +2383,7 @@ void ONNXImporter::parseUnsqueeze(LayerParams& layerParams, const opencv_onnx::N } Mat out = input.reshape(0, dims); - addConstant(layerParams.name, out); + addConstant(node_proto.output(0), out); return; } @@ -2323,6 +2422,7 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node CV_CheckEQ(node_proto.input_size(), 2, ""); const std::string& input0 = node_proto.input(0); const std::string& input1 = node_proto.input(1); + const std::string output_name = node_proto.output(0); Mat newShapeMat = getBlob(input1); MatShape targetShape(newShapeMat.ptr(), newShapeMat.ptr() + newShapeMat.total()); @@ -2392,7 +2492,7 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node input = input.reshape(0, total(inpShape, 0, broadcast_axes[0])); Mat output = cv::repeat(input, 1, targetShape[broadcast_axes[0]]); output = output.reshape(0, targetShape); - addConstant(layerParams.name, output); + addConstant(output_name, output); return; } @@ -2422,7 +2522,7 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node layerParams.set("axis", broadcast_axes[0]); layerParams.type = "Concat"; - node_proto.set_output(0, layerParams.name); + node_proto.set_output(0, output_name); } else if (broadcast_axes.empty()) { @@ -2448,7 +2548,7 @@ void ONNXImporter::parseReshape(LayerParams& layerParams, const opencv_onnx::Nod if (layer_id.find(node_proto.input(0)) == layer_id.end()) { std::vector inputs(1, getBlob(node_proto, 0)), outputs; runLayer(layerParams, inputs, outputs); - addConstant(layerParams.name, outputs[0]); + addConstant(node_proto.output(0), outputs[0]); return; } } @@ -2462,7 +2562,7 @@ void ONNXImporter::parseReshape(LayerParams& layerParams, const opencv_onnx::Nod if (layer_id.find(node_proto.input(0)) == layer_id.end()) { Mat input = getBlob(node_proto, 0); Mat out = input.reshape(0, dim); - addConstant(layerParams.name, out); + addConstant(node_proto.output(0), out); return; } replaceLayerParam(layerParams, "shape", "dim"); @@ -2514,11 +2614,9 @@ void ONNXImporter::parseShape(LayerParams& layerParams, const opencv_onnx::NodeP if (isDynamicShape) { CV_LOG_ERROR(NULL, "DNN/ONNX(Shape): dynamic 'zero' shapes are not supported, input " << toString(inpShape, node_proto.input(0))); - // FIXIT repair assertion - // Disabled to pass face detector tests from #20422 - // CV_Assert(!isDynamicShape); // not supported + CV_Assert(!isDynamicShape); // not supported } - addConstant(layerParams.name, shapeMat); + addConstant(node_proto.output(0), shapeMat); } void ONNXImporter::parseCast(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) @@ -2542,7 +2640,7 @@ void ONNXImporter::parseCast(LayerParams& layerParams, const opencv_onnx::NodePr Mat dst; blob.convertTo(dst, type); dst.dims = blob.dims; - addConstant(layerParams.name, dst); + addConstant(node_proto.output(0), dst); return; } else @@ -2569,7 +2667,7 @@ void ONNXImporter::parseConstantFill(LayerParams& layerParams, const opencv_onnx for (int i = 0; i < inpShape.size(); i++) CV_CheckGT(inpShape[i], 0, ""); Mat tensor(inpShape.size(), &inpShape[0], depth, Scalar(fill_value)); - addConstant(layerParams.name, tensor); + addConstant(node_proto.output(0), tensor); } void ONNXImporter::parseGather(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) @@ -2597,7 +2695,7 @@ void ONNXImporter::parseGather(LayerParams& layerParams, const opencv_onnx::Node } else { out.dims = 1; } - addConstant(layerParams.name, out); + addConstant(node_proto.output(0), out); return; } else @@ -2691,7 +2789,7 @@ void ONNXImporter::parseConcat(LayerParams& layerParams, const opencv_onnx::Node runLayer(layerParams, inputs, concatenated); CV_Assert(concatenated.size() == 1); - addConstant(layerParams.name, concatenated[0]); + addConstant(node_proto.output(0), concatenated[0]); return; } else @@ -2740,16 +2838,20 @@ void ONNXImporter::parseResize(LayerParams& layerParams, const opencv_onnx::Node // opset-10: input = [X, scales] // opset-11: input = [X, roi, scales] or [x, roi, scales, sizes] + // opset-13: may have empty input, [X, "", "", sizes] or [x, "", scales] int scalesInputId = node_proto.input_size() == 2 ? 1 : 2; + const std::string& scale_name = node_proto.input(scalesInputId); + Mat scales; + if(!scale_name.empty()) + scales = getBlob(node_proto, scalesInputId); - Mat scales = getBlob(node_proto, scalesInputId); if (!scales.empty()) { CV_CheckEQ(scales.total(), (size_t)4, "HCHW layout is expected"); layerParams.set("zoom_factor_y", scales.at(2)); layerParams.set("zoom_factor_x", scales.at(3)); } - else if (node_proto.input_size() >= 4) // opset-11 + else if (node_proto.input_size() >= 4) // opset-11 [x, roi, scales, sizes] or opset-13: input = [X, "", "", sizes] { const std::string& inputSizes = node_proto.input(3); if (constBlobs.find(inputSizes) != constBlobs.end()) @@ -2871,6 +2973,15 @@ void ONNXImporter::parseCumSum(LayerParams& layerParams, const opencv_onnx::Node addLayer(layerParams, node_proto); } +void ONNXImporter::parseSimpleLayers(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + for (int j = 0; j < node_proto.input_size(); j++) { + if (layer_id.find(node_proto.input(j)) == layer_id.end()) + layerParams.blobs.push_back(getBlob(node_proto, j)); + } + addLayer(layerParams, node_proto); +} + void ONNXImporter::parseCustomLayer(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { const std::string& name = layerParams.name; @@ -2886,20 +2997,11 @@ void ONNXImporter::parseCustomLayer(LayerParams& layerParams, const opencv_onnx: } } - CV_LOG_INFO(NULL, "DNN/ONNX: unknown node type, try using custom handler for node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " + CV_LOG_IF_INFO(NULL, !LayerFactory::isLayerRegistered(layer_type), "DNN/ONNX: unknown node type, try using custom handler for node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) ); - if (missingLayerHandler) - { - missingLayerHandler->addMissing(layerParams.name, layerParams.type); - } - - for (int j = 0; j < node_proto.input_size(); j++) { - if (layer_id.find(node_proto.input(j)) == layer_id.end()) - layerParams.blobs.push_back(getBlob(node_proto, j)); - } - addLayer(layerParams, node_proto); + parseSimpleLayers(layerParams, node_proto); } void ONNXImporter::parseQuantDequant(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) @@ -3349,6 +3451,15 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version) dispatch["DetectionOutput"] = &ONNXImporter::parseDetectionOutput; dispatch["CumSum"] = &ONNXImporter::parseCumSum; + std::vector simpleLayers{"Acos", "Acosh", "Asin", "Asinh", "Atan", "Atanh", "Ceil", "Celu", "Cos", + "Cosh", "Dropout", "Erf", "Exp", "Floor", "HardSigmoid", "HardSwish", + "Identity", "Log", "Round", "Selu", "Sigmoid", "Sin", "Sinh", "Softmax", + "Softplus", "Softsign", "Sqrt", "Tan", "ThresholdedRelu"}; + for (const auto& name : simpleLayers) + { + dispatch[name] = &ONNXImporter::parseSimpleLayers; + } + // ai.onnx: opset 10+ dispatch["QuantizeLinear"] = dispatch["DequantizeLinear"] = &ONNXImporter::parseQuantDequant; dispatch["QLinearConv"] = &ONNXImporter::parseQConv; diff --git a/modules/dnn/src/op_inf_engine.cpp b/modules/dnn/src/op_inf_engine.cpp index d9b98404c3..2899545c6d 100644 --- a/modules/dnn/src/op_inf_engine.cpp +++ b/modules/dnn/src/op_inf_engine.cpp @@ -20,52 +20,17 @@ namespace cv { namespace dnn { #ifdef HAVE_INF_ENGINE -static Backend parseInferenceEngineBackendType(const cv::String& backend) -{ - CV_Assert(!backend.empty()); - if (backend == CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - return DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; - if (backend == CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API) - return DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019; - CV_Error(Error::StsBadArg, cv::format("Unknown IE backend: %s", backend.c_str())); -} -static const char* dumpInferenceEngineBackendType(Backend backend) -{ - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - return CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - return CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API; - CV_Error(Error::StsBadArg, cv::format("Invalid backend ID for IE: %d", backend)); -} -Backend& getInferenceEngineBackendTypeParam() -{ - static Backend param = parseInferenceEngineBackendType( - utils::getConfigurationParameterString("OPENCV_DNN_BACKEND_INFERENCE_ENGINE_TYPE", -#ifdef HAVE_DNN_NGRAPH - CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH -#elif defined(HAVE_DNN_IE_NN_BUILDER_2019) - CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API -#else -#error "Build configuration error: nGraph or NN Builder API backend should be enabled" -#endif - ) - ); - return param; -} - CV__DNN_INLINE_NS_BEGIN cv::String getInferenceEngineBackendType() { - return dumpInferenceEngineBackendType(getInferenceEngineBackendTypeParam()); + return "NGRAPH"; } cv::String setInferenceEngineBackendType(const cv::String& newBackendType) { - Backend newBackend = parseInferenceEngineBackendType(newBackendType); - Backend& param = getInferenceEngineBackendTypeParam(); - Backend old = param; - param = newBackend; - return dumpInferenceEngineBackendType(old); + if (newBackendType != "NGRAPH") + CV_Error(Error::StsNotImplemented, cv::format("DNN/IE: only NGRAPH backend is supported: %s", newBackendType.c_str())); + return newBackendType; } CV__DNN_INLINE_NS_END @@ -98,508 +63,6 @@ void infEngineBlobsToMats(const std::vector& blobs, } -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - -// For networks with input layer which has an empty name, IE generates a name id[some_number]. -// OpenCV lets users use an empty input name and to prevent unexpected naming, -// we can use some predefined name. -static std::string kDefaultInpLayerName = "empty_inp_layer_name"; -static std::string kOpenCVLayersType = "OpenCVLayer"; - -static std::string shapesToStr(const std::vector& mats) -{ - std::ostringstream shapes; - shapes << mats.size() << " "; - for (const Mat& m : mats) - { - shapes << m.dims << " "; - for (int i = 0; i < m.dims; ++i) - shapes << m.size[i] << " "; - } - return shapes.str(); -} - -static void strToShapes(const std::string& str, std::vector >& shapes) -{ - std::istringstream ss(str); - int num, dims; - ss >> num; - shapes.resize(num); - for (int i = 0; i < num; ++i) - { - ss >> dims; - shapes[i].resize(dims); - for (int j = 0; j < dims; ++j) - ss >> shapes[i][j]; - } -} - -class InfEngineCustomLayer : public InferenceEngine::ILayerExecImpl -{ -public: - explicit InfEngineCustomLayer(const InferenceEngine::CNNLayer& layer) : cnnLayer(layer) - { - std::istringstream iss(layer.GetParamAsString("impl")); - size_t ptr; - iss >> ptr; - cvLayer = (Layer*)ptr; - - std::vector > shapes; - strToShapes(layer.GetParamAsString("internals"), shapes); - internals.resize(shapes.size()); - for (int i = 0; i < shapes.size(); ++i) - internals[i].create(std::vector(shapes[i].begin(), shapes[i].end()), CV_32F); - } - - virtual InferenceEngine::StatusCode execute(std::vector& inputs, - std::vector& outputs, - InferenceEngine::ResponseDesc *resp) noexcept - { - std::vector inpMats, outMats; - infEngineBlobsToMats(inputs, inpMats); - infEngineBlobsToMats(outputs, outMats); - - try - { - cvLayer->forward(inpMats, outMats, internals); - return InferenceEngine::StatusCode::OK; - } - catch (...) - { - return InferenceEngine::StatusCode::GENERAL_ERROR; - } - } - - virtual InferenceEngine::StatusCode - getSupportedConfigurations(std::vector& conf, - InferenceEngine::ResponseDesc* resp) noexcept - { - std::vector inDataConfig; - std::vector outDataConfig; - for (auto& it : cnnLayer.insData) - { - InferenceEngine::DataConfig conf; - conf.desc = it.lock()->getTensorDesc(); - inDataConfig.push_back(conf); - } - - for (auto& it : cnnLayer.outData) - { - InferenceEngine::DataConfig conf; - conf.desc = it->getTensorDesc(); - outDataConfig.push_back(conf); - } - - InferenceEngine::LayerConfig layerConfig; - layerConfig.inConfs = inDataConfig; - layerConfig.outConfs = outDataConfig; - - conf.push_back(layerConfig); - return InferenceEngine::StatusCode::OK; - } - - InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config, - InferenceEngine::ResponseDesc *resp) noexcept - { - return InferenceEngine::StatusCode::OK; - } - -private: - InferenceEngine::CNNLayer cnnLayer; - dnn::Layer* cvLayer; - std::vector internals; -}; - -class InfEngineCustomLayerShapeInfer : public InferenceEngine::IShapeInferImpl -{ -public: - InferenceEngine::StatusCode - inferShapes(const std::vector& inBlobs, - const std::map& params, - const std::map& blobs, - std::vector& outShapes, - InferenceEngine::ResponseDesc* desc) noexcept override - { - strToShapes(params.at("outputs"), outShapes); - return InferenceEngine::StatusCode::OK; - } -}; - -class InfEngineCustomLayerFactory : public InferenceEngine::ILayerImplFactory { -public: - explicit InfEngineCustomLayerFactory(const InferenceEngine::CNNLayer* layer) : cnnLayer(*layer) {} - - InferenceEngine::StatusCode - getImplementations(std::vector& impls, - InferenceEngine::ResponseDesc* resp) noexcept override { - impls.push_back(std::make_shared(cnnLayer)); - return InferenceEngine::StatusCode::OK; - } - -private: - InferenceEngine::CNNLayer cnnLayer; -}; - -InferenceEngine::StatusCode InfEngineExtension::getFactoryFor( - InferenceEngine::ILayerImplFactory*& factory, - const InferenceEngine::CNNLayer* cnnLayer, - InferenceEngine::ResponseDesc* resp -) noexcept -{ - if (cnnLayer->type != kOpenCVLayersType) - return InferenceEngine::StatusCode::NOT_IMPLEMENTED; - factory = new InfEngineCustomLayerFactory(cnnLayer); - return InferenceEngine::StatusCode::OK; -} - -InfEngineBackendNode::InfEngineBackendNode(const InferenceEngine::Builder::Layer& _layer) - : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019), layer(_layer) {} - - InfEngineBackendNode::InfEngineBackendNode(Ptr& cvLayer_, std::vector& inputs, - std::vector& outputs, - std::vector& internals) - : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019), layer(cvLayer_->name), - cvLayer(cvLayer_) -{ - CV_Assert(!cvLayer->name.empty()); - layer.setName(cvLayer->name); - layer.setType(kOpenCVLayersType); - layer.getParameters()["impl"] = (size_t)cvLayer.get(); - layer.getParameters()["outputs"] = shapesToStr(outputs); - layer.getParameters()["internals"] = shapesToStr(internals); - layer.setInputPorts(std::vector(inputs.size())); - layer.setOutputPorts(std::vector(outputs.size())); -} - -static std::vector > -infEngineWrappers(const std::vector >& ptrs) -{ - std::vector > wrappers(ptrs.size()); - for (int i = 0; i < ptrs.size(); ++i) - { - CV_Assert(!ptrs[i].empty()); - wrappers[i] = ptrs[i].dynamicCast(); - CV_Assert(!wrappers[i].empty()); - } - return wrappers; -} - -InfEngineBackendNet::InfEngineBackendNet() : netBuilder("") -{ - hasNetOwner = false; - device_name = "CPU"; -} - -InfEngineBackendNet::InfEngineBackendNet(InferenceEngine::CNNNetwork& net) : netBuilder(""), cnn(net) -{ - hasNetOwner = true; - device_name = "CPU"; -} - -void InfEngineBackendNet::connect(const std::vector >& inputs, - const std::vector >& outputs, - const std::string& layerName) -{ - std::vector > inpWrappers = infEngineWrappers(inputs); - std::map::iterator it = layers.find(layerName); - CV_Assert(it != layers.end()); - - const int layerId = it->second; - for (size_t i = 0; i < inpWrappers.size(); ++i) - { - const auto& inp = inpWrappers[i]; - const std::string& inpName = inp->dataPtr->getName(); - - std::string inpLayerName = inpName; - size_t inpPortId = inpName.rfind('.'); - if (inpPortId != std::string::npos) - { - std::string portIdStr = inpName.substr(inpPortId + 1); - if (std::all_of(portIdStr.begin(), portIdStr.end(), ::isdigit)) - { - inpLayerName = inpName.substr(0, inpPortId); - inpPortId = atoi(portIdStr.c_str()); - } - else - inpPortId = 0; - } - else - inpPortId = 0; - - int inpId; - it = layers.find(inpLayerName); - if (it == layers.end()) - { - InferenceEngine::Builder::InputLayer inpLayer(!inpLayerName.empty() ? inpLayerName : kDefaultInpLayerName); - std::vector shape(inp->blob->getTensorDesc().getDims()); - inpLayer.setPort(InferenceEngine::Port(shape)); - inpId = netBuilder.addLayer(inpLayer); - - layers.insert({inpName, inpId}); - } - else - inpId = it->second; - - netBuilder.connect({(size_t)inpId, inpPortId}, {(size_t)layerId, i}); - unconnectedPorts.erase({inpId, inpPortId}); - } - CV_Assert(!outputs.empty()); - for (int i = 0; i < outputs.size(); ++i) - { - InferenceEngine::DataPtr dataPtr = infEngineDataNode(outputs[i]); - std::string outputName = outputs.size() > 1 ? (layerName + "." + std::to_string(i)) : layerName; -#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1) - dataPtr->name = outputName; -#else - dataPtr->setName(outputName); -#endif - } -} - -void InfEngineBackendNet::init(Target targetId) -{ - if (!hasNetOwner) - { - CV_Assert(!unconnectedPorts.empty()); - for (const auto& port : unconnectedPorts) - { - InferenceEngine::Builder::OutputLayer outLayer("myconv1"); -#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) - // Inference Engine determines network precision by ports. - InferenceEngine::Precision p = (targetId == DNN_TARGET_MYRIAD || - targetId == DNN_TARGET_HDDL || - targetId == DNN_TARGET_OPENCL_FP16) ? - InferenceEngine::Precision::FP16 : - InferenceEngine::Precision::FP32; - outLayer.setPort(InferenceEngine::Port({}, p)); -#endif - netBuilder.addLayer({InferenceEngine::PortInfo(port.first, port.second)}, outLayer); - } - netBuilder.getContext().addShapeInferImpl(kOpenCVLayersType, - std::make_shared()); - cnn = InferenceEngine::CNNNetwork(InferenceEngine::Builder::convertToICNNNetwork(netBuilder.build())); - } - - switch (targetId) - { - case DNN_TARGET_CPU: - device_name = "CPU"; - break; - case DNN_TARGET_OPENCL: - case DNN_TARGET_OPENCL_FP16: - device_name = "GPU"; - break; - case DNN_TARGET_MYRIAD: - device_name = "MYRIAD"; - break; - case DNN_TARGET_HDDL: - device_name = "HDDL"; - break; - case DNN_TARGET_FPGA: - device_name = "FPGA"; - break; - default: - CV_Error(Error::StsNotImplemented, "Unknown target"); - }; - - for (const auto& name : requestedOutputs) - { - cnn.addOutput(name); - } - - for (const auto& it : cnn.getInputsInfo()) - { - const std::string& name = it.first; - auto blobIt = allBlobs.find(name); - CV_Assert(blobIt != allBlobs.end()); - it.second->setPrecision(blobIt->second->getTensorDesc().getPrecision()); - } - for (const auto& it : cnn.getOutputsInfo()) - { - const std::string& name = it.first; - auto blobIt = allBlobs.find(name); - CV_Assert(blobIt != allBlobs.end()); - it.second->setPrecision(blobIt->second->getTensorDesc().getPrecision()); // Should be always FP32 - } - - initPlugin(cnn); -} - -void InfEngineBackendNet::addLayer(InferenceEngine::Builder::Layer& layer) -{ -#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) - // Add weights to network and connect them after input blobs. - std::map& params = layer.getParameters(); - std::vector blobsIds; - std::vector portIds; - for (const std::string& name : {"weights", "biases"}) - { - bool asInput = false; - int portId = 0; - for (int i = 0; i < layer.getInputPorts().size(); ++i) - { - const auto& port = layer.getInputPorts()[i]; - auto it = port.getParameters().find("type"); - if (it != port.getParameters().end() && it->second == name) - { - portId = i; - asInput = true; - break; - } - } - - if (!asInput) - continue; - - auto it = params.find(name); - if (it != params.end()) - { - InferenceEngine::Blob::Ptr blob = it->second.as(); - params.erase(it); - int blobId = netBuilder.addLayer(InferenceEngine::Builder::ConstLayer(name).setData(blob)); - blobsIds.push_back(blobId); - portIds.push_back(portId); - } - } -#endif - - int id = netBuilder.addLayer(layer); - const std::string& layerName = layer.getName(); - - CV_Assert(layers.insert({layerName, id}).second); - for (int i = 0; i < layer.getOutputPorts().size(); ++i) - unconnectedPorts.insert({id, i}); - -#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) - // By default, all the weights are connected to last ports ids. - for (int i = 0; i < blobsIds.size(); ++i) - { - netBuilder.connect((size_t)blobsIds[i], {(size_t)id, (size_t)portIds[i]}); - } -#endif -} - -void InfEngineBackendNet::addOutput(const std::string& name) -{ - requestedOutputs.push_back(name); -} - -static InferenceEngine::Layout estimateLayout(const Mat& m) -{ - if (m.dims == 4) - return InferenceEngine::Layout::NCHW; - else if (m.dims == 2) - return InferenceEngine::Layout::NC; - else - return InferenceEngine::Layout::ANY; -} - -static InferenceEngine::DataPtr wrapToInfEngineDataNode(const Mat& m, const std::string& name = "") -{ - std::vector shape = getShape(m); - if (m.type() == CV_32F) - return InferenceEngine::DataPtr(new InferenceEngine::Data(name, - {InferenceEngine::Precision::FP32, shape, estimateLayout(m)})); - else if (m.type() == CV_8U) - return InferenceEngine::DataPtr(new InferenceEngine::Data(name, - {InferenceEngine::Precision::U8, shape, estimateLayout(m)})); - else - CV_Error(Error::StsNotImplemented, format("Unsupported data type %d", m.type())); -} - -InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector& shape, - InferenceEngine::Layout layout) -{ - if (m.type() == CV_32F) - return InferenceEngine::make_shared_blob( - {InferenceEngine::Precision::FP32, shape, layout}, (float*)m.data); - else if (m.type() == CV_8U) - return InferenceEngine::make_shared_blob( - {InferenceEngine::Precision::U8, shape, layout}, (uint8_t*)m.data); - else - CV_Error(Error::StsNotImplemented, format("Unsupported data type %d", m.type())); -} - -InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::Layout layout) -{ - std::vector shape = getShape(m); - return wrapToInfEngineBlob(m, shape, layout); -} - -InferenceEngine::Blob::Ptr cloneBlob(const InferenceEngine::Blob::Ptr& blob) -{ - InferenceEngine::Blob::Ptr copy; - auto description = blob->getTensorDesc(); - InferenceEngine::Precision precision = description.getPrecision(); - if (precision == InferenceEngine::Precision::FP32) - { - copy = InferenceEngine::make_shared_blob(description); - } - else if (precision == InferenceEngine::Precision::U8) - { - copy = InferenceEngine::make_shared_blob(description); - } - else - CV_Error(Error::StsNotImplemented, "Unsupported blob precision"); - copy->allocate(); - return copy; -} - -InferenceEngine::DataPtr infEngineDataNode(const Ptr& ptr) -{ - CV_Assert(!ptr.empty()); - Ptr p = ptr.dynamicCast(); - CV_Assert(!p.empty()); - return p->dataPtr; -} - -InfEngineBackendWrapper::InfEngineBackendWrapper(int targetId, const cv::Mat& m) - : BackendWrapper(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, targetId) -{ - dataPtr = wrapToInfEngineDataNode(m); - blob = wrapToInfEngineBlob(m, estimateLayout(m)); -} - -InfEngineBackendWrapper::InfEngineBackendWrapper(Ptr wrapper) - : BackendWrapper(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, wrapper->targetId) -{ - Ptr ieWrapper = wrapper.dynamicCast(); - CV_Assert(!ieWrapper.empty()); - InferenceEngine::DataPtr srcData = ieWrapper->dataPtr; - - dataPtr = InferenceEngine::DataPtr(new InferenceEngine::Data(srcData->getName(), srcData->getTensorDesc())); - blob = ieWrapper->blob; -} - -Ptr InfEngineBackendWrapper::create(Ptr wrapper) -{ - return Ptr(new InfEngineBackendWrapper(wrapper)); -} - -InfEngineBackendWrapper::~InfEngineBackendWrapper() -{ - -} - -void InfEngineBackendWrapper::copyToHost() -{ - -} - -void InfEngineBackendWrapper::setHostDirty() -{ - -} - -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - -#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1) -static std::map& getSharedPlugins() -{ - static std::map sharedPlugins; - return sharedPlugins; -} -#else static bool init_IE_plugins() { // load and hold IE plugins @@ -653,7 +116,7 @@ InferenceEngine::Core& getCore(const std::string& id) : create_IE_Core_instance(id); return core; } -#endif + static bool detectArmPlugin_() { @@ -672,10 +135,10 @@ static bool detectArmPlugin_() } #if !defined(OPENCV_DNN_IE_VPU_TYPE_DEFAULT) -static bool detectMyriadX_(std::string device) +static bool detectMyriadX_(const std::string& device) { AutoLock lock(getInitializationMutex()); -#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R3) + // Lightweight detection InferenceEngine::Core& ie = getCore(device); const std::vector devices = ie.GetAvailableDevices(); @@ -689,481 +152,22 @@ static bool detectMyriadX_(std::string device) } } return false; -#else - InferenceEngine::Builder::Network builder(""); - InferenceEngine::idx_t inpId = builder.addLayer( - InferenceEngine::Builder::InputLayer().setPort(InferenceEngine::Port({1}))); - -#if INF_ENGINE_RELEASE <= 2018050000 - InferenceEngine::idx_t clampId; - { - InferenceEngine::Builder::Layer l = InferenceEngine::Builder::ClampLayer(); - auto& blobs = l.getConstantData(); - auto blob = InferenceEngine::make_shared_blob( - InferenceEngine::Precision::FP16, - InferenceEngine::Layout::C, {1}); - blob->allocate(); - blobs[""] = blob; - clampId = builder.addLayer({inpId}, l); - } - builder.addLayer({InferenceEngine::PortInfo(clampId)}, InferenceEngine::Builder::OutputLayer()); -#else - - InferenceEngine::idx_t clampId = builder.addLayer({inpId}, InferenceEngine::Builder::ClampLayer()); - builder.addLayer({InferenceEngine::PortInfo(clampId)}, - InferenceEngine::Builder::OutputLayer().setPort(InferenceEngine::Port({}, - InferenceEngine::Precision::FP16))); -#endif - - InferenceEngine::CNNNetwork cnn = InferenceEngine::CNNNetwork( - InferenceEngine::Builder::convertToICNNNetwork(builder.build())); - -#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1) - InferenceEngine::InferenceEnginePluginPtr enginePtr; - { - auto& sharedPlugins = getSharedPlugins(); - auto pluginIt = sharedPlugins.find(device); - if (pluginIt != sharedPlugins.end()) { - enginePtr = pluginIt->second; - } else { - auto dispatcher = InferenceEngine::PluginDispatcher({""}); - enginePtr = dispatcher.getPluginByDevice(device); - sharedPlugins[device] = enginePtr; - } - } - auto plugin = InferenceEngine::InferencePlugin(enginePtr); - try - { - auto netExec = plugin.LoadNetwork(cnn, {{"VPU_PLATFORM", "VPU_2480"}}); -#else - try - { -#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3) - auto netExec = getCore(device).LoadNetwork(cnn, device, {{"VPU_PLATFORM", "VPU_2480"}}); -#else - auto netExec = getCore(device).LoadNetwork(cnn, device, {{"VPU_MYRIAD_PLATFORM", "VPU_MYRIAD_2480"}}); -#endif -#endif - auto infRequest = netExec.CreateInferRequest(); - } catch(...) { - return false; - } - return true; -#endif } #endif // !defined(OPENCV_DNN_IE_VPU_TYPE_DEFAULT) -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - -void InfEngineBackendNet::initPlugin(InferenceEngine::CNNNetwork& net) -{ - CV_Assert(!isInitialized()); - - try - { - AutoLock lock(getInitializationMutex()); -#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1) - auto& sharedPlugins = getSharedPlugins(); - auto pluginIt = sharedPlugins.find(device_name); - if (pluginIt != sharedPlugins.end()) - { - enginePtr = pluginIt->second; - } - else -#else - InferenceEngine::Core& ie = getCore(device_name); -#endif - { -#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1) - auto dispatcher = InferenceEngine::PluginDispatcher({""}); - if (device_name == "FPGA") - enginePtr = dispatcher.getPluginByDevice("HETERO:FPGA,CPU"); - else - enginePtr = dispatcher.getPluginByDevice(device_name); - sharedPlugins[device_name] = enginePtr; -#else - isInit = true; -#endif - std::vector candidates; - std::string param_pluginPath = utils::getConfigurationParameterString("OPENCV_DNN_IE_EXTRA_PLUGIN_PATH", ""); - if (!param_pluginPath.empty()) - { - candidates.push_back(param_pluginPath); - } -#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3) - if (device_name == "CPU" || device_name == "FPGA") - { - std::string suffixes[] = {"_avx2", "_sse4", ""}; - bool haveFeature[] = { - checkHardwareSupport(CPU_AVX2), - checkHardwareSupport(CPU_SSE4_2), - true - }; - for (int i = 0; i < 3; ++i) - { - if (!haveFeature[i]) - continue; -#ifdef _WIN32 - candidates.push_back("cpu_extension" + suffixes[i] + ".dll"); -#elif defined(__APPLE__) - candidates.push_back("libcpu_extension" + suffixes[i] + ".so"); // built as loadable module - candidates.push_back("libcpu_extension" + suffixes[i] + ".dylib"); // built as shared library -#else - candidates.push_back("libcpu_extension" + suffixes[i] + ".so"); -#endif // _WIN32 - } - } -#endif - bool found = false; - for (size_t i = 0; i != candidates.size(); ++i) - { - const std::string& libName = candidates[i]; - try - { - InferenceEngine::IExtensionPtr extension = - InferenceEngine::make_so_pointer(libName); - -#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1) - enginePtr->AddExtension(extension, 0); -#else - ie.AddExtension(extension, "CPU"); -#endif - CV_LOG_INFO(NULL, "DNN-IE: Loaded extension plugin: " << libName); - found = true; - break; - } - catch(...) {} - } - if (!found && !candidates.empty()) - { - CV_LOG_WARNING(NULL, "DNN-IE: Can't load extension plugin (extra layers for some networks). Specify path via OPENCV_DNN_IE_EXTRA_PLUGIN_PATH parameter"); - } - // Some of networks can work without a library of extra layers. -#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2019R1) - // OpenCV fallbacks as extensions. - try - { - ie.AddExtension(std::make_shared(), "CPU"); - } - catch(const std::exception& e) - { - CV_LOG_INFO(NULL, "DNN-IE: Can't register OpenCV custom layers extension: " << e.what()); - } -#endif - // Limit the number of CPU threads. -#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1) -#ifndef _WIN32 - enginePtr->SetConfig({{ - InferenceEngine::PluginConfigParams::KEY_CPU_THREADS_NUM, format("%d", getNumThreads()), - }}, 0); -#endif // _WIN32 -#else - if (device_name == "CPU") - ie.SetConfig({{ - InferenceEngine::PluginConfigParams::KEY_CPU_THREADS_NUM, format("%d", getNumThreads()), - }}, device_name); -#endif - } -#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1) - plugin = InferenceEngine::InferencePlugin(enginePtr); - netExec = plugin.LoadNetwork(net, {}); -#else - bool isHetero = false; - if (device_name != "CPU") - { - isHetero = device_name == "FPGA"; - for (auto& layer : net) - { - if (layer->type == kOpenCVLayersType) - { - isHetero = true; -#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2019R3) - // Not sure about lower versions but in 2019R3 we do not need this - layer->affinity = "CPU"; - } - else - { - layer->affinity = device_name; -#endif - } - } - } - if (isHetero) - netExec = ie.LoadNetwork(net, "HETERO:" + device_name + ",CPU"); - else - netExec = ie.LoadNetwork(net, device_name); -#endif - } - catch (const std::exception& ex) - { - CV_Error(Error::StsError, format("Failed to initialize Inference Engine backend (device = %s): %s", device_name.c_str(), ex.what())); - } -} - -bool InfEngineBackendNet::isInitialized() -{ -#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1) - return (bool)enginePtr; -#else - return isInit; -#endif -} - -void InfEngineBackendNet::reset() -{ - allBlobs.clear(); - infRequests.clear(); - isInit = false; -} - -void InfEngineBackendNet::addBlobs(const std::vector >& ptrs) -{ - auto wrappers = infEngineWrappers(ptrs); - for (const auto& wrapper : wrappers) - { - std::string name = wrapper->dataPtr->getName(); - name = name.empty() ? kDefaultInpLayerName : name; - allBlobs.insert({name, wrapper->blob}); - } -} - -void InfEngineBackendNet::InfEngineReqWrapper::makePromises(const std::vector >& outsWrappers) -{ - auto outs = infEngineWrappers(outsWrappers); - outProms.clear(); - outProms.resize(outs.size()); - outsNames.resize(outs.size()); - for (int i = 0; i < outs.size(); ++i) - { - outs[i]->futureMat = outProms[i].getArrayResult(); - outsNames[i] = outs[i]->dataPtr->getName(); - } -} - -void InfEngineBackendNet::forward(const std::vector >& outBlobsWrappers, - bool isAsync) -{ - CV_LOG_DEBUG(NULL, "InfEngineBackendNet::forward(" << (isAsync ? "async" : "sync") << ")"); - // Look for finished requests. - Ptr reqWrapper; - for (auto& wrapper : infRequests) - { - if (wrapper->isReady) - { - reqWrapper = wrapper; - break; - } - } - if (reqWrapper.empty()) - { - reqWrapper = Ptr(new InfEngineReqWrapper()); - try - { - reqWrapper->req = netExec.CreateInferRequest(); - } - catch (const std::exception& ex) - { - CV_Error(Error::StsAssert, format("Failed to initialize Inference Engine backend: %s", ex.what())); - } - infRequests.push_back(reqWrapper); - - InferenceEngine::BlobMap inpBlobs, outBlobs; - for (const auto& it : cnn.getInputsInfo()) - { - const std::string& name = it.first; - auto blobIt = allBlobs.find(name); - CV_Assert(blobIt != allBlobs.end()); - inpBlobs[name] = isAsync ? cloneBlob(blobIt->second) : blobIt->second; - } - for (const auto& it : cnn.getOutputsInfo()) - { - const std::string& name = it.first; - auto blobIt = allBlobs.find(name); - CV_Assert(blobIt != allBlobs.end()); - outBlobs[name] = isAsync ? cloneBlob(blobIt->second) : blobIt->second; - } - reqWrapper->req.SetInput(inpBlobs); - reqWrapper->req.SetOutput(outBlobs); - - InferenceEngine::IInferRequest::Ptr infRequestPtr = reqWrapper->req; - infRequestPtr->SetUserData(reqWrapper.get(), 0); - - infRequestPtr->SetCompletionCallback( - [](InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode status) - { - CV_LOG_DEBUG(NULL, "DNN(IE): completionCallback(" << (int)status << ")"); - - InfEngineReqWrapper* wrapper; - request->GetUserData((void**)&wrapper, 0); - CV_Assert(wrapper && "Internal error"); - - size_t processedOutputs = 0; - try - { - for (; processedOutputs < wrapper->outProms.size(); ++processedOutputs) - { - const std::string& name = wrapper->outsNames[processedOutputs]; - Mat m = infEngineBlobToMat(wrapper->req.GetBlob(name)); - - try - { - CV_Assert(status == InferenceEngine::StatusCode::OK); - wrapper->outProms[processedOutputs].setValue(m.clone()); - } - catch (...) - { - try { - wrapper->outProms[processedOutputs].setException(std::current_exception()); - } catch(...) { - CV_LOG_ERROR(NULL, "DNN: Exception occurred during async inference exception propagation"); - } - } - } - } - catch (...) - { - std::exception_ptr e = std::current_exception(); - for (; processedOutputs < wrapper->outProms.size(); ++processedOutputs) - { - try { - wrapper->outProms[processedOutputs].setException(e); - } catch(...) { - CV_LOG_ERROR(NULL, "DNN: Exception occurred during async inference exception propagation"); - } - } - } - wrapper->isReady = true; - } - ); - } - if (isAsync) - { - // Copy actual data to infer request's input blobs. - for (const auto& it : cnn.getInputsInfo()) - { - const std::string& name = it.first; - auto blobIt = allBlobs.find(name); - Mat srcMat = infEngineBlobToMat(blobIt->second); - Mat dstMat = infEngineBlobToMat(reqWrapper->req.GetBlob(name)); - srcMat.copyTo(dstMat); - } - - // Set promises to output blobs wrappers. - reqWrapper->makePromises(outBlobsWrappers); - - reqWrapper->isReady = false; - reqWrapper->req.StartAsync(); - } - else - { - reqWrapper->req.Infer(); - } -} - -bool InfEngineBackendLayer::getMemoryShapes(const std::vector &inputs, - const int requiredOutputs, - std::vector &outputs, - std::vector &internals) const -{ - InferenceEngine::ICNNNetwork::InputShapes inShapes = t_net.getInputShapes(); - InferenceEngine::ICNNNetwork::InputShapes::iterator itr; - bool equal_flag = true; - size_t i = 0; - for (itr = inShapes.begin(); itr != inShapes.end(); ++itr) - { - InferenceEngine::SizeVector currentInShape(inputs[i].begin(), inputs[i].end()); - if (itr->second != currentInShape) - { - itr->second = currentInShape; - equal_flag = false; - } - i++; - } - - if (!equal_flag) - { - InferenceEngine::CNNNetwork curr_t_net(t_net); - curr_t_net.reshape(inShapes); - } - std::vector dims = t_net.getOutputsInfo()[name]->getDims(); - outputs.push_back(MatShape(dims.begin(), dims.end())); - return false; -} - -bool InfEngineBackendLayer::supportBackend(int backendId) -{ - CV_LOG_DEBUG(NULL, "InfEngineBackendLayer::supportBackend(" << backendId << ")"); - return backendId == DNN_BACKEND_DEFAULT || - (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019); -} - -void InfEngineBackendLayer::forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, - OutputArrayOfArrays internals) -{ - CV_Error(Error::StsInternal, "Choose Inference Engine as a preferable backend."); -} - -InferenceEngine::Blob::Ptr convertFp16(const InferenceEngine::Blob::Ptr& blob) -{ - auto halfs = InferenceEngine::make_shared_blob({ - InferenceEngine::Precision::FP16, blob->getTensorDesc().getDims(), - blob->getTensorDesc().getLayout() - }); - halfs->allocate(); - Mat floatsData(1, blob->size(), CV_32F, blob->buffer()); - Mat halfsData(1, blob->size(), CV_16SC1, halfs->buffer()); - convertFp16(floatsData, halfsData); - return halfs; -} - -void addConstantData(const std::string& name, InferenceEngine::Blob::Ptr data, - InferenceEngine::Builder::Layer& l) -{ -#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) - l.getParameters()[name] = data; -#else - l.addConstantData(name, data); -#endif -} - -#endif // HAVE_DNN_IE_NN_BUILDER_2019 - #endif // HAVE_INF_ENGINE -bool haveInfEngine() -{ -#ifdef HAVE_INF_ENGINE - return true; -#else - return false; -#endif // HAVE_INF_ENGINE -} - -void forwardInfEngine(const std::vector >& outBlobsWrappers, - Ptr& node, bool isAsync) -{ - CV_Assert(haveInfEngine()); -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - CV_Assert(!node.empty()); - Ptr ieNode = node.dynamicCast(); - CV_Assert(!ieNode.empty()); - ieNode->net->forward(outBlobsWrappers, isAsync); -#else - CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support"); -#endif // HAVE_INF_ENGINE -} CV__DNN_INLINE_NS_BEGIN void resetMyriadDevice() { #ifdef HAVE_INF_ENGINE + CV_LOG_INFO(NULL, "DNN: Unregistering both 'MYRIAD' and 'HETERO:MYRIAD,CPU' plugins"); + AutoLock lock(getInitializationMutex()); -#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1) - getSharedPlugins().erase("MYRIAD"); -#else - // Unregister both "MYRIAD" and "HETERO:MYRIAD,CPU" plugins + InferenceEngine::Core& ie = getCore("MYRIAD"); try { @@ -1171,18 +175,16 @@ void resetMyriadDevice() ie.UnregisterPlugin("HETERO"); } catch (...) {} -#endif #endif // HAVE_INF_ENGINE } void releaseHDDLPlugin() { #ifdef HAVE_INF_ENGINE + CV_LOG_INFO(NULL, "DNN: Unregistering both 'HDDL' and 'HETERO:HDDL,CPU' plugins"); + AutoLock lock(getInitializationMutex()); -#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R1) - getSharedPlugins().erase("HDDL"); -#else - // Unregister both "HDDL" and "HETERO:HDDL,CPU" plugins + InferenceEngine::Core& ie = getCore("HDDL"); try { @@ -1190,7 +192,6 @@ void releaseHDDLPlugin() ie.UnregisterPlugin("HETERO"); } catch (...) {} -#endif #endif // HAVE_INF_ENGINE } diff --git a/modules/dnn/src/op_inf_engine.hpp b/modules/dnn/src/op_inf_engine.hpp index ab2f161eaf..ed1323d7dd 100644 --- a/modules/dnn/src/op_inf_engine.hpp +++ b/modules/dnn/src/op_inf_engine.hpp @@ -48,37 +48,16 @@ #pragma GCC diagnostic ignored "-Wsuggest-override" #endif -#if defined(HAVE_DNN_IE_NN_BUILDER_2019) || INF_ENGINE_VER_MAJOR_EQ(INF_ENGINE_RELEASE_2020_4) -//#define INFERENCE_ENGINE_DEPRECATED // turn off deprecation warnings from IE -//there is no way to suppress warnings from IE only at this moment, so we are forced to suppress warnings globally -#if defined(__GNUC__) -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif -#ifdef _MSC_VER -#pragma warning(disable: 4996) // was declared deprecated -#endif -#endif - -#if defined(__GNUC__) && INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_1) -#pragma GCC visibility push(default) -#endif - #include -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 -#include -#endif - -#if defined(__GNUC__) && INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_1) -#pragma GCC visibility pop -#endif - #if defined(__GNUC__) && __GNUC__ >= 5 //#pragma GCC diagnostic pop #endif #endif // HAVE_INF_ENGINE +#define CV_ERROR_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 do { CV_Error(Error::StsNotImplemented, "This OpenCV version is built without Inference Engine NN Builder API support (legacy API is not supported anymore)"); } while (0) + namespace cv { namespace dnn { #ifdef HAVE_INF_ENGINE @@ -90,167 +69,6 @@ Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob); void infEngineBlobsToMats(const std::vector& blobs, std::vector& mats); -#ifdef HAVE_DNN_IE_NN_BUILDER_2019 - -class InfEngineBackendNet -{ -public: - InfEngineBackendNet(); - - InfEngineBackendNet(InferenceEngine::CNNNetwork& net); - - void addLayer(InferenceEngine::Builder::Layer& layer); - - void addOutput(const std::string& name); - - void connect(const std::vector >& inputs, - const std::vector >& outputs, - const std::string& layerName); - - bool isInitialized(); - - void init(Target targetId); - - void forward(const std::vector >& outBlobsWrappers, - bool isAsync); - - void initPlugin(InferenceEngine::CNNNetwork& net); - - void addBlobs(const std::vector >& ptrs); - - void reset(); - -private: - InferenceEngine::Builder::Network netBuilder; - - InferenceEngine::ExecutableNetwork netExec; - InferenceEngine::BlobMap allBlobs; - std::string device_name; -#if INF_ENGINE_VER_MAJOR_LE(2019010000) - InferenceEngine::InferenceEnginePluginPtr enginePtr; - InferenceEngine::InferencePlugin plugin; -#else - bool isInit = false; -#endif - - struct InfEngineReqWrapper - { - InfEngineReqWrapper() : isReady(true) {} - - void makePromises(const std::vector >& outs); - - InferenceEngine::InferRequest req; - std::vector outProms; - std::vector outsNames; - bool isReady; - }; - - std::vector > infRequests; - - InferenceEngine::CNNNetwork cnn; - bool hasNetOwner; - - std::map layers; - std::vector requestedOutputs; - - std::set > unconnectedPorts; -}; - -class InfEngineBackendNode : public BackendNode -{ -public: - InfEngineBackendNode(const InferenceEngine::Builder::Layer& layer); - - InfEngineBackendNode(Ptr& layer, std::vector& inputs, - std::vector& outputs, std::vector& internals); - - void connect(std::vector >& inputs, - std::vector >& outputs); - - // Inference Engine network object that allows to obtain the outputs of this layer. - InferenceEngine::Builder::Layer layer; - Ptr net; - // CPU fallback in case of unsupported Inference Engine layer. - Ptr cvLayer; -}; - -class InfEngineBackendWrapper : public BackendWrapper -{ -public: - InfEngineBackendWrapper(int targetId, const Mat& m); - - InfEngineBackendWrapper(Ptr wrapper); - - ~InfEngineBackendWrapper(); - - static Ptr create(Ptr wrapper); - - virtual void copyToHost() CV_OVERRIDE; - - virtual void setHostDirty() CV_OVERRIDE; - - InferenceEngine::DataPtr dataPtr; - InferenceEngine::Blob::Ptr blob; - AsyncArray futureMat; -}; - -InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::Layout layout = InferenceEngine::Layout::ANY); - -InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector& shape, InferenceEngine::Layout layout); - -InferenceEngine::DataPtr infEngineDataNode(const Ptr& ptr); - -// Convert Inference Engine blob with FP32 precision to FP16 precision. -// Allocates memory for a new blob. -InferenceEngine::Blob::Ptr convertFp16(const InferenceEngine::Blob::Ptr& blob); - -void addConstantData(const std::string& name, InferenceEngine::Blob::Ptr data, InferenceEngine::Builder::Layer& l); - -// This is a fake class to run networks from Model Optimizer. Objects of that -// class simulate responses of layers are imported by OpenCV and supported by -// Inference Engine. The main difference is that they do not perform forward pass. -class InfEngineBackendLayer : public Layer -{ -public: - InfEngineBackendLayer(const InferenceEngine::CNNNetwork &t_net_) : t_net(t_net_) {}; - - virtual bool getMemoryShapes(const std::vector &inputs, - const int requiredOutputs, - std::vector &outputs, - std::vector &internals) const CV_OVERRIDE; - - virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, - OutputArrayOfArrays internals) CV_OVERRIDE; - - virtual bool supportBackend(int backendId) CV_OVERRIDE; - -private: - InferenceEngine::CNNNetwork t_net; -}; - - -class InfEngineExtension : public InferenceEngine::IExtension -{ -public: -#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_2) - virtual void SetLogCallback(InferenceEngine::IErrorListener&) noexcept {} -#endif - virtual void Unload() noexcept {} - virtual void Release() noexcept {} - virtual void GetVersion(const InferenceEngine::Version*&) const noexcept {} - - virtual InferenceEngine::StatusCode getPrimitiveTypes(char**&, unsigned int&, - InferenceEngine::ResponseDesc*) noexcept - { - return InferenceEngine::StatusCode::OK; - } - - InferenceEngine::StatusCode getFactoryFor(InferenceEngine::ILayerImplFactory*& factory, - const InferenceEngine::CNNLayer* cnnLayer, - InferenceEngine::ResponseDesc* resp) noexcept; -}; - -#endif // HAVE_DNN_IE_NN_BUILDER_2019 CV__DNN_INLINE_NS_BEGIN @@ -273,14 +91,8 @@ static inline std::vector getShape(const Mat& mat) return result; } - #endif // HAVE_INF_ENGINE -bool haveInfEngine(); - -void forwardInfEngine(const std::vector >& outBlobsWrappers, - Ptr& node, bool isAsync); - }} // namespace dnn, namespace cv #endif // __OPENCV_DNN_OP_INF_ENGINE_HPP__ diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index efaedfaab1..763abf3b4d 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -11,6 +11,8 @@ Implementation of Tensorflow models parser #include "../precomp.hpp" +#include + #include #include #undef CV_LOG_STRIP_LEVEL @@ -513,6 +515,7 @@ class TFLayerHandler; class TFImporter { + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; public: TFImporter(Net& net, const char *model, const char *config = NULL); TFImporter(Net& net, const char *dataModel, size_t lenModel, @@ -3090,10 +3093,8 @@ void TFImporter::populateNet() { const tensorflow::NodeDef& layer = net.node(li); - const std::string name = layer.name(); - const std::string type = layer.op(); - const int ninputs = layer.input_size(); - CV_LOG_DEBUG(NULL, "DNN/TF: (" << li << "/" << layersSize << ") Parse layer " << name << " @ " << type << " with " << ninputs << " inputs"); + CV_LOG_DEBUG(NULL, "DNN/TF: processing node (" << li << "/" << layersSize << ") with " << layer.input_size() << " inputs: " + << cv::format("[%s]:(%s)", layer.op().c_str(), layer.name().c_str())); parseNode(layer); } diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp index 1e7f07a478..57a624d541 100644 --- a/modules/dnn/src/torch/torch_importer.cpp +++ b/modules/dnn/src/torch/torch_importer.cpp @@ -40,6 +40,9 @@ //M*/ #include "../precomp.hpp" + +#include + #include #include #include @@ -106,6 +109,8 @@ static inline bool endsWith(const String &str, const char *substr) struct TorchImporter { + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + typedef std::map > TensorsMap; Net net; diff --git a/modules/dnn/test/test_common.impl.hpp b/modules/dnn/test/test_common.impl.hpp index 9de1ac6821..747dc02ce6 100644 --- a/modules/dnn/test/test_common.impl.hpp +++ b/modules/dnn/test/test_common.impl.hpp @@ -337,16 +337,6 @@ testing::internal::ParamGenerator< tuple > dnnBackendsAndTarget std::vector< tuple > targets; std::vector< Target > available; - { - available = getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019); - for (std::vector< Target >::const_iterator i = available.begin(); i != available.end(); ++i) - { - if ((*i == DNN_TARGET_MYRIAD || *i == DNN_TARGET_HDDL) && !withVPU) - continue; - targets.push_back(make_tuple(DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, *i)); - } - } - { available = getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); for (std::vector< Target >::const_iterator i = available.begin(); i != available.end(); ++i) diff --git a/modules/dnn/test/test_ie_models.cpp b/modules/dnn/test/test_ie_models.cpp index 2846f9ae76..0fe19db5e9 100644 --- a/modules/dnn/test/test_ie_models.cpp +++ b/modules/dnn/test/test_ie_models.cpp @@ -371,17 +371,17 @@ TEST_P(DNNTestOpenVINO, models) || modelName == "person-vehicle-bike-detection-2004" // 2021.4+: ncDeviceOpen:1013 Failed to find booted device after boot ) ) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); if (targetId == DNN_TARGET_OPENCL && (false || modelName == "face-detection-0106" // Operation: 2278 of type ExperimentalDetectronPriorGridGenerator(op::v6) is not supported ) ) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); if (targetId == DNN_TARGET_OPENCL_FP16 && (false || modelName == "face-detection-0106" // Operation: 2278 of type ExperimentalDetectronPriorGridGenerator(op::v6) is not supported ) ) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif #if INF_ENGINE_VER_MAJOR_GE(2020020000) @@ -397,12 +397,7 @@ TEST_P(DNNTestOpenVINO, models) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API); - else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); - else - FAIL() << "Unknown backendId"; + ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId); bool isFP16 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD); diff --git a/modules/dnn/test/test_int8_layers.cpp b/modules/dnn/test/test_int8_layers.cpp index c181dfa5eb..6c41a8dbbb 100644 --- a/modules/dnn/test/test_int8_layers.cpp +++ b/modules/dnn/test/test_int8_layers.cpp @@ -218,8 +218,16 @@ TEST_P(Test_Int8_layers, ReLU6) TEST_P(Test_Int8_layers, Sigmoid) { testLayer("maxpooling_sigmoid", "ONNX", 0.0011, 0.0032); - testLayer("maxpooling_sigmoid_dynamic_axes", "ONNX", 0.0011, 0.0032); - testLayer("maxpooling_sigmoid_1d", "ONNX", 0.0011, 0.0037); +} + +TEST_P(Test_Int8_layers, Sigmoid_dynamic_axes) +{ + testLayer("maxpooling_sigmoid_dynamic_axes", "ONNX", 0.002, 0.0032); +} + +TEST_P(Test_Int8_layers, Sigmoid_1d) +{ + testLayer("maxpooling_sigmoid_1d", "ONNX", 0.002, 0.0037); } TEST_P(Test_Int8_layers, Mish) @@ -316,16 +324,48 @@ TEST_P(Test_Int8_layers, Identity) testLayer("expand_neg_batch", "ONNX", 0.00071, 0.0019); } -TEST_P(Test_Int8_layers, Slice) +TEST_P(Test_Int8_layers, Slice_split_tf) { testLayer("split", "TensorFlow", 0.0033, 0.0056); +} + +TEST_P(Test_Int8_layers, Slice_4d_tf) +{ testLayer("slice_4d", "TensorFlow", 0.003, 0.0073); +} + +TEST_P(Test_Int8_layers, Slice_strided_tf) +{ testLayer("strided_slice", "TensorFlow", 0.008, 0.0142); +} + +TEST_P(Test_Int8_layers, Slice_onnx) +{ testLayer("slice", "ONNX", 0.0046, 0.0077); - testLayer("slice_dynamic_axes", "ONNX", 0.0039, 0.0084); - testLayer("slice_opset_11_steps_2d", "ONNX", 0.0052, 0.0124); +} + +TEST_P(Test_Int8_layers, Slice_dynamic_axes_onnx) +{ + testLayer("slice_dynamic_axes", "ONNX", 0.0039, 0.02); +} + +TEST_P(Test_Int8_layers, Slice_steps_2d_onnx11) +{ + testLayer("slice_opset_11_steps_2d", "ONNX", 0.01, 0.0124); +} + +TEST_P(Test_Int8_layers, Slice_steps_3d_onnx11) +{ testLayer("slice_opset_11_steps_3d", "ONNX", 0.0068, 0.014); +} + +TEST_P(Test_Int8_layers, Slice_steps_4d_onnx11) +{ testLayer("slice_opset_11_steps_4d", "ONNX", 0.0041, 0.008); +} + +TEST_P(Test_Int8_layers, Slice_steps_5d_onnx11) +{ testLayer("slice_opset_11_steps_5d", "ONNX", 0.0085, 0.021); } diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 8bbccdbc96..3f4a437637 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -1265,12 +1265,7 @@ TEST_P(Layer_Test_Convolution_DLDT, Accuracy) if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) throw SkipTestException("No support for async forward"); - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API); - else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); - else - FAIL() << "Unknown backendId"; + ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId); Net netDefault = readNet(_tf("layer_convolution.caffemodel"), _tf("layer_convolution.prototxt")); Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin")); @@ -1310,12 +1305,7 @@ TEST_P(Layer_Test_Convolution_DLDT, setInput_uint8) if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) throw SkipTestException("No support for async forward"); - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API); - else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); - else - FAIL() << "Unknown backendId"; + ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId); int blobSize[] = {2, 6, 75, 113}; Mat inputs[] = {Mat(4, &blobSize[0], CV_8U), Mat()}; @@ -1348,12 +1338,7 @@ TEST_P(Layer_Test_Convolution_DLDT, multithreading) if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) throw SkipTestException("No support for async forward"); - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API); - else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); - else - FAIL() << "Unknown backendId"; + ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId); std::string xmlPath = _tf("layer_convolution.xml"); std::string binPath = _tf("layer_convolution.bin"); diff --git a/modules/dnn/test/test_misc.cpp b/modules/dnn/test/test_misc.cpp index 39bb73a918..108131bd9d 100644 --- a/modules/dnn/test/test_misc.cpp +++ b/modules/dnn/test/test_misc.cpp @@ -117,12 +117,7 @@ void test_readNet_IE_do_not_call_setInput(Backend backendId) const std::string& model = findDataFile("dnn/layers/layer_convolution.bin"); const std::string& proto = findDataFile("dnn/layers/layer_convolution.xml"); - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API); - else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); - else - FAIL() << "Unknown backendId"; + ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId); Net net = readNet(model, proto); net.setPreferableBackend(backendId); @@ -462,12 +457,7 @@ TEST_P(Async, model_optimizer_pipeline_set_and_forward_single) const std::string& model = findDataFile("dnn/layers/layer_convolution.bin"); const std::string& proto = findDataFile("dnn/layers/layer_convolution.xml"); - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API); - else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); - else - FAIL() << "Unknown backendId"; + ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId); Net netSync = readNet(model, proto); netSync.setPreferableBackend(backendId); @@ -523,12 +513,7 @@ TEST_P(Async, model_optimizer_pipeline_set_and_forward_all) const std::string& model = findDataFile("dnn/layers/layer_convolution.bin"); const std::string& proto = findDataFile("dnn/layers/layer_convolution.xml"); - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API); - else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); - else - FAIL() << "Unknown backendId"; + ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId); Net netSync = readNet(model, proto); netSync.setPreferableBackend(backendId); @@ -586,12 +571,7 @@ TEST_P(Async, create_layer_pipeline_set_and_forward_all) if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && dtype == CV_8U) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API); - else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); - else - FAIL() << "Unknown backendId"; + ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId); Net netSync; Net netAsync; @@ -697,12 +677,7 @@ TEST_P(Test_Model_Optimizer, forward_two_nets) const std::string& model = findDataFile("dnn/layers/layer_convolution.bin"); const std::string& proto = findDataFile("dnn/layers/layer_convolution.xml"); - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API); - else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); - else - FAIL() << "Unknown backendId"; + ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId); Net net0 = readNet(model, proto); net0.setPreferableTarget(targetId); @@ -741,12 +716,7 @@ TEST_P(Test_Model_Optimizer, readFromBuffer) const std::string& weightsFile = findDataFile("dnn/layers/layer_convolution.bin"); const std::string& modelFile = findDataFile("dnn/layers/layer_convolution.xml"); - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API); - else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); - else - FAIL() << "Unknown backendId"; + ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId); Net net1 = readNetFromModelOptimizer(modelFile, weightsFile); net1.setPreferableBackend(backendId); @@ -793,12 +763,7 @@ TEST_P(Test_Model_Optimizer, flexible_inputs) const std::string& model = findDataFile("dnn/layers/layer_convolution.bin"); const std::string& proto = findDataFile("dnn/layers/layer_convolution.xml"); - if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API); - else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); - else - FAIL() << "Unknown backendId"; + ASSERT_EQ(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, backendId); Net net0 = readNet(model, proto); net0.setPreferableTarget(targetId); diff --git a/modules/dnn/test/test_onnx_conformance.cpp b/modules/dnn/test/test_onnx_conformance.cpp index 1c3877b7b2..0e912ede54 100644 --- a/modules/dnn/test/test_onnx_conformance.cpp +++ b/modules/dnn/test/test_onnx_conformance.cpp @@ -1181,10 +1181,10 @@ TEST_P(Test_ONNX_conformance, Layer_Test) } std::vector layerNames = net.getUnconnectedOutLayersNames(); - std::vector< std::vector > outputs_; + std::vector outputs; try { - net.forward(outputs_, layerNames); + net.forward(outputs, layerNames); } catch (...) { @@ -1192,8 +1192,7 @@ TEST_P(Test_ONNX_conformance, Layer_Test) applyTestTag(CV_TEST_TAG_DNN_ERROR_FORWARD); throw; } - ASSERT_GE(outputs_.size(), 1); - const std::vector& outputs = outputs_[0]; + ASSERT_GE(outputs.size(), 1); if (checkLayersFallbacks && checkFallbacks(net)) { diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp index ec262ed301..bdd8f3b8b9 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp @@ -59,6 +59,12 @@ EOF_LABEL: bool filterApplied = false; +#if INF_ENGINE_VER_MAJOR_EQ(2021040000) || INF_ENGINE_VER_MAJOR_EQ(2022010000) +#define SKIP_SET_1 1 +#else +#define SKIP_SET_1 0 +#endif + // Update note: execute /testdata/dnn/onnx/generate_conformance_list.py BEGIN_SWITCH() CASE(test_abs) @@ -82,11 +88,11 @@ CASE(test_adam_multiple) CASE(test_add) // no filter CASE(test_add_bcast) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_add_uint8) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_and2d) @@ -106,131 +112,131 @@ CASE(test_and_bcast4v3d) CASE(test_and_bcast4v4d) // no filter CASE(test_argmax_default_axis_example) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmax_default_axis_example_select_last_index) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmax_default_axis_random) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmax_default_axis_random_select_last_index) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmax_keepdims_example) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmax_keepdims_example_select_last_index) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmax_keepdims_random) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmax_keepdims_random_select_last_index) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmax_negative_axis_keepdims_example) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmax_negative_axis_keepdims_example_select_last_index) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmax_negative_axis_keepdims_random) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmax_negative_axis_keepdims_random_select_last_index) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmax_no_keepdims_example) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmax_no_keepdims_example_select_last_index) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmax_no_keepdims_random) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmax_no_keepdims_random_select_last_index) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmin_default_axis_example) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmin_default_axis_example_select_last_index) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmin_default_axis_random) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmin_default_axis_random_select_last_index) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmin_keepdims_example) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmin_keepdims_example_select_last_index) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmin_keepdims_random) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmin_keepdims_random_select_last_index) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmin_negative_axis_keepdims_example) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmin_negative_axis_keepdims_example_select_last_index) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmin_negative_axis_keepdims_random) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmin_negative_axis_keepdims_random_select_last_index) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmin_no_keepdims_example) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmin_no_keepdims_example_select_last_index) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmin_no_keepdims_random) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_argmin_no_keepdims_random_select_last_index) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_asin) @@ -256,11 +262,11 @@ CASE(test_averagepool_2d_ceil) CASE(test_averagepool_2d_default) // no filter CASE(test_averagepool_2d_pads) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_MYRIAD; #endif CASE(test_averagepool_2d_pads_count_include_pad) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_CPU; // MYRIAD is ok SKIP_OPENCL; @@ -269,7 +275,7 @@ CASE(test_averagepool_2d_pads_count_include_pad) CASE(test_averagepool_2d_precomputed_pads) // no filter CASE(test_averagepool_2d_precomputed_pads_count_include_pad) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_averagepool_2d_precomputed_same_upper) @@ -277,7 +283,7 @@ CASE(test_averagepool_2d_precomputed_same_upper) CASE(test_averagepool_2d_precomputed_strides) // no filter CASE(test_averagepool_2d_same_lower) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_averagepool_2d_same_upper) @@ -287,11 +293,11 @@ CASE(test_averagepool_2d_strides) CASE(test_averagepool_3d_default) // no filter CASE(test_basic_conv_with_padding) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_MYRIAD; #endif CASE(test_basic_conv_without_padding) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_MYRIAD; #endif CASE(test_basic_convinteger) @@ -349,11 +355,11 @@ CASE(test_cast_FLOAT_to_DOUBLE) CASE(test_cast_FLOAT_to_FLOAT16) // no filter CASE(test_cast_FLOAT_to_STRING) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_cast_STRING_to_FLOAT) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_castlike_BFLOAT16_to_FLOAT) @@ -391,13 +397,13 @@ CASE(test_castlike_FLOAT_to_FLOAT16_expanded) CASE(test_castlike_FLOAT_to_STRING) // no filter CASE(test_castlike_FLOAT_to_STRING_expanded) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_castlike_STRING_to_FLOAT) // no filter CASE(test_castlike_STRING_to_FLOAT_expanded) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_ceil) @@ -441,7 +447,7 @@ CASE(test_compress_negative_axis) CASE(test_concat_1d_axis_0) // no filter CASE(test_concat_1d_axis_negative_1) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_concat_2d_axis_0) @@ -475,19 +481,19 @@ CASE(test_constantofshape_int_shape_zero) CASE(test_constantofshape_int_zeros) // no filter CASE(test_conv_with_autopad_same) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_MYRIAD; #endif CASE(test_conv_with_strides_and_asymmetric_padding) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_MYRIAD; #endif CASE(test_conv_with_strides_no_padding) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_MYRIAD; #endif CASE(test_conv_with_strides_padding) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_MYRIAD; #endif CASE(test_convinteger_with_padding) @@ -555,13 +561,13 @@ CASE(test_det_nd) CASE(test_div) // no filter CASE(test_div_bcast) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_div_example) // no filter CASE(test_div_uint8) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_dropout_default) @@ -573,7 +579,7 @@ CASE(test_dropout_default_mask_ratio) CASE(test_dropout_default_old) // no filter CASE(test_dropout_default_ratio) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_dropout_random_old) @@ -697,11 +703,11 @@ CASE(test_globalaveragepool) CASE(test_globalaveragepool_precomputed) // no filter CASE(test_globalmaxpool) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_MYRIAD; #endif CASE(test_globalmaxpool_precomputed) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_MYRIAD; #endif CASE(test_greater) @@ -811,12 +817,12 @@ CASE(test_log) CASE(test_log_example) // no filter CASE(test_logsoftmax_axis_0) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_OPENCL; SKIP_OPENCL_FP16; #endif CASE(test_logsoftmax_axis_0_expanded) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_OPENCL; SKIP_OPENCL_FP16; #endif @@ -829,7 +835,7 @@ CASE(test_logsoftmax_axis_2) CASE(test_logsoftmax_axis_2_expanded) // no filter CASE(test_logsoftmax_default_axis) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_logsoftmax_default_axis_expanded) @@ -839,12 +845,12 @@ CASE(test_logsoftmax_example_1) CASE(test_logsoftmax_example_1_expanded) // no filter CASE(test_logsoftmax_large_number) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_OPENCL_FP16; SKIP_MYRIAD; #endif CASE(test_logsoftmax_large_number_expanded) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_OPENCL_FP16; SKIP_MYRIAD; #endif @@ -907,71 +913,71 @@ CASE(test_max_uint64) CASE(test_max_uint8) // no filter CASE(test_maxpool_1d_default) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_MYRIAD; #endif CASE(test_maxpool_2d_ceil) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_MYRIAD; #endif CASE(test_maxpool_2d_default) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_MYRIAD; #endif CASE(test_maxpool_2d_dilations) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_maxpool_2d_pads) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_MYRIAD; #endif CASE(test_maxpool_2d_precomputed_pads) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_MYRIAD; #endif CASE(test_maxpool_2d_precomputed_same_upper) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_MYRIAD; #endif CASE(test_maxpool_2d_precomputed_strides) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_MYRIAD; #endif CASE(test_maxpool_2d_same_lower) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_maxpool_2d_same_upper) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_MYRIAD; #endif CASE(test_maxpool_2d_strides) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_MYRIAD; #endif CASE(test_maxpool_2d_uint8) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_maxpool_3d_default) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_NON_CPU; #endif CASE(test_maxpool_with_argmax_2d_precomputed_pads) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_maxpool_with_argmax_2d_precomputed_strides) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_maxunpool_export_with_output_shape) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_maxunpool_export_without_output_shape) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_mean_example) @@ -1041,13 +1047,13 @@ CASE(test_momentum_multiple) CASE(test_mul) // no filter CASE(test_mul_bcast) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_mul_example) // no filter CASE(test_mul_uint8) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_mvn) @@ -1717,12 +1723,12 @@ CASE(test_slice_negative_axes) CASE(test_slice_start_out_of_bounds) // no filter CASE(test_softmax_axis_0) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_OPENCL; SKIP_OPENCL_FP16; #endif CASE(test_softmax_axis_0_expanded) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_OPENCL; SKIP_OPENCL_FP16; #endif @@ -1735,7 +1741,7 @@ CASE(test_softmax_axis_2) CASE(test_softmax_axis_2_expanded) // no filter CASE(test_softmax_default_axis) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_softmax_default_axis_expanded) @@ -1745,12 +1751,12 @@ CASE(test_softmax_example) CASE(test_softmax_example_expanded) // no filter CASE(test_softmax_large_number) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_OPENCL_FP16; SKIP_MYRIAD; #endif CASE(test_softmax_large_number_expanded) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP_OPENCL_FP16; SKIP_MYRIAD; #endif @@ -1771,26 +1777,11 @@ CASE(test_spacetodepth) CASE(test_spacetodepth_example) // no filter CASE(test_split_equal_parts_1d) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) - SKIP_CPU; - // MYRIAD is ok - SKIP_OPENCL; - SKIP_OPENCL_FP16; -#endif + // no filter CASE(test_split_equal_parts_2d) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) - SKIP_CPU; - // MYRIAD is ok - SKIP_OPENCL; - SKIP_OPENCL_FP16; -#endif + // no filter CASE(test_split_equal_parts_default_axis) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) - SKIP_CPU; - // MYRIAD is ok - SKIP_OPENCL; - SKIP_OPENCL_FP16; -#endif + // no filter CASE(test_split_variable_parts_1d) // no filter CASE(test_split_variable_parts_2d) @@ -1822,13 +1813,13 @@ CASE(test_strnormalizer_nostopwords_nochangecase) CASE(test_sub) // no filter CASE(test_sub_bcast) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_sub_example) // no filter CASE(test_sub_uint8) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_sum_example) @@ -1964,7 +1955,7 @@ CASE(test_unsqueeze_two_axes) CASE(test_unsqueeze_unsorted_axes) // no filter CASE(test_upsample_nearest) -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if SKIP_SET_1 SKIP; #endif CASE(test_where_example) diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 9f13727e95..4918c72d10 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -885,9 +885,14 @@ TEST_P(Test_ONNX_layers, DynamicResize) testONNXModels("dynamic_resize_9", npy, 0, 0, false, true, 2); testONNXModels("dynamic_resize_10", npy, 0, 0, false, true, 2); testONNXModels("dynamic_resize_11", npy, 0, 0, false, true, 2); + testONNXModels("dynamic_resize_13", npy, 0, 0, false, true, 2); testONNXModels("dynamic_resize_scale_9", npy, 0, 0, false, true, 2); testONNXModels("dynamic_resize_scale_10", npy, 0, 0, false, true, 2); testONNXModels("dynamic_resize_scale_11", npy, 0, 0, false, true, 2); + testONNXModels("dynamic_resize_scale_13", npy, 0, 0, false, true, 2); + + testONNXModels("resize_size_opset11"); + testONNXModels("resize_size_opset13"); } TEST_P(Test_ONNX_layers, Resize_HumanSeg) diff --git a/modules/gapi/CMakeLists.txt b/modules/gapi/CMakeLists.txt index f3c6a70d1e..cd47421ea7 100644 --- a/modules/gapi/CMakeLists.txt +++ b/modules/gapi/CMakeLists.txt @@ -13,8 +13,14 @@ if(NOT TARGET ade) return() endif() -if(INF_ENGINE_TARGET) - ocv_option(OPENCV_GAPI_INF_ENGINE "Build GraphAPI module with Inference Engine support" ON) +if(TARGET ocv.3rdparty.openvino) + # TODO: remove OPENCV_GAPI_INF_ENGINE option + set(initial_value ON) + if(DEFINED OPENCV_GAPI_INF_ENGINE) + set(initial_value ${OPENCV_GAPI_INF_ENGINE}) + message(WARNING "OPENCV_GAPI_INF_ENGINE option is deprecated. Use OPENCV_GAPI_WITH_OPENVINO option instead.") + endif() + ocv_option(OPENCV_GAPI_WITH_OPENVINO "G-API: Enable OpenVINO Toolkit support" ${initial_value}) endif() set(the_description "OpenCV G-API Core Module") @@ -45,6 +51,7 @@ file(GLOB gapi_ext_hdrs "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/fluid/*.hpp" "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/gpu/*.hpp" "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/infer/*.hpp" + "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/oak/*.hpp" "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/ocl/*.hpp" "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/own/*.hpp" "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/plaidml/*.hpp" @@ -54,6 +61,7 @@ file(GLOB gapi_ext_hdrs "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/streaming/*.hpp" "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/streaming/gstreamer/*.hpp" "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/streaming/onevpl/*.hpp" + "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/plaidml/*.hpp" "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/util/*.hpp" ) @@ -127,6 +135,11 @@ set(gapi_srcs src/backends/fluid/gfluidcore.cpp src/backends/fluid/gfluidcore_func.dispatch.cpp + # OAK Backend (optional) + src/backends/oak/goak.cpp + src/backends/oak/goakbackend.cpp + src/backends/oak/goak_media_adapter.cpp + # OCL Backend (currently built-in) src/backends/ocl/goclbackend.cpp src/backends/ocl/goclkernel.cpp @@ -185,6 +198,8 @@ set(gapi_srcs src/streaming/onevpl/engine/processing_engine_base.cpp src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp src/streaming/onevpl/engine/decode/decode_session.cpp + src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp + src/streaming/onevpl/engine/transcode/transcode_session.cpp src/streaming/onevpl/demux/async_mfp_demux_data_provider.cpp src/streaming/onevpl/data_provider_dispatcher.cpp @@ -219,12 +234,8 @@ ocv_create_module() ocv_target_link_libraries(${the_module} PRIVATE ade) -if(OPENCV_GAPI_INF_ENGINE) - ocv_target_link_libraries(${the_module} PRIVATE ${INF_ENGINE_TARGET}) -endif() - -if (HAVE_NGRAPH) - ocv_target_link_libraries(${the_module} PRIVATE ngraph::ngraph) +if(TARGET ocv.3rdparty.openvino AND OPENCV_GAPI_WITH_OPENVINO) + ocv_target_link_libraries(${the_module} PRIVATE ocv.3rdparty.openvino) endif() if(HAVE_TBB) @@ -239,11 +250,8 @@ if(CV_TRACE AND HAVE_ITT) endif() set(__test_extra_deps "") -if(OPENCV_GAPI_INF_ENGINE) - list(APPEND __test_extra_deps ${INF_ENGINE_TARGET}) -endif() -if(HAVE_NGRAPH) - list(APPEND __test_extra_deps ngraph::ngraph) +if(TARGET ocv.3rdparty.openvino AND OPENCV_GAPI_WITH_OPENVINO) + list(APPEND __test_extra_deps ocv.3rdparty.openvino) endif() ocv_add_accuracy_tests(${__test_extra_deps}) @@ -254,9 +262,6 @@ ocv_add_accuracy_tests(${__test_extra_deps}) if(TARGET opencv_test_gapi) target_include_directories(opencv_test_gapi PRIVATE "${CMAKE_CURRENT_LIST_DIR}/src") target_link_libraries(opencv_test_gapi PRIVATE ade) - if (HAVE_NGRAPH) - ocv_target_compile_definitions(opencv_test_gapi PRIVATE -DHAVE_NGRAPH) - endif() endif() if(HAVE_TBB AND TARGET opencv_test_gapi) @@ -272,6 +277,14 @@ if(HAVE_FREETYPE) ocv_target_include_directories(${the_module} PRIVATE ${FREETYPE_INCLUDE_DIRS}) endif() +if(HAVE_OAK) + ocv_target_compile_definitions(${the_module} PRIVATE -DHAVE_OAK) + if(TARGET opencv_test_gapi) + ocv_target_compile_definitions(opencv_test_gapi PRIVATE -DHAVE_OAK) + endif() + ocv_target_link_libraries(${the_module} PRIVATE depthai::core) +endif() + if(HAVE_PLAIDML) ocv_target_compile_definitions(${the_module} PRIVATE -DHAVE_PLAIDML) if(TARGET opencv_test_gapi) @@ -329,16 +342,21 @@ ocv_add_samples() # Required for sample with inference on host -if (TARGET example_gapi_onevpl_infer_single_roi) - if(OPENCV_GAPI_INF_ENGINE) - ocv_target_link_libraries(example_gapi_onevpl_infer_single_roi PRIVATE ${INF_ENGINE_TARGET}) - ocv_target_compile_definitions(example_gapi_onevpl_infer_single_roi PRIVATE -DHAVE_INF_ENGINE) +if(TARGET example_gapi_onevpl_infer_single_roi) + if(TARGET ocv.3rdparty.openvino AND OPENCV_GAPI_WITH_OPENVINO) + ocv_target_link_libraries(example_gapi_onevpl_infer_single_roi PRIVATE ocv.3rdparty.openvino) endif() if(HAVE_D3D11 AND HAVE_OPENCL) ocv_target_include_directories(example_gapi_onevpl_infer_single_roi SYSTEM PRIVATE ${OPENCL_INCLUDE_DIRS}) endif() endif() +if(TARGET example_gapi_pipeline_modeling_tool) + if(WIN32) + ocv_target_link_libraries(example_gapi_pipeline_modeling_tool winmm.lib) + endif() +endif() + # perf test dependencies postprocessing if(HAVE_GAPI_ONEVPL) # NB: TARGET opencv_perf_gapi doesn't exist before `ocv_add_perf_tests` @@ -350,3 +368,13 @@ if(HAVE_GAPI_ONEVPL) endif() endif() endif() + +if(HAVE_OAK) + # FIXME: consider better solution + if(TARGET example_gapi_oak_rgb_camera_encoding) + ocv_target_compile_definitions(example_gapi_oak_rgb_camera_encoding PRIVATE -DHAVE_OAK) + endif() + if(TARGET example_gapi_oak_small_hetero_pipeline) + ocv_target_compile_definitions(example_gapi_oak_small_hetero_pipeline PRIVATE -DHAVE_OAK) + endif() +endif() diff --git a/modules/gapi/cmake/init.cmake b/modules/gapi/cmake/init.cmake index 1c464328ca..dd4b0bccfa 100644 --- a/modules/gapi/cmake/init.cmake +++ b/modules/gapi/cmake/init.cmake @@ -1,7 +1,8 @@ OCV_OPTION(WITH_ADE "Enable ADE framework (required for Graph API module)" ON) -OCV_OPTION(WITH_FREETYPE "Enable FreeType framework" OFF) -OCV_OPTION(WITH_PLAIDML "Include PlaidML2 support" OFF) +OCV_OPTION(WITH_FREETYPE "Enable FreeType framework" OFF) +OCV_OPTION(WITH_PLAIDML "Include PlaidML2 support" OFF) +OCV_OPTION(WITH_OAK "Include OpenCV AI Kit support" OFF) if(NOT WITH_ADE) return() @@ -39,3 +40,10 @@ if(WITH_GAPI_ONEVPL) set(HAVE_GAPI_ONEVPL TRUE) endif() endif() + +if(WITH_OAK) + find_package(depthai QUIET) + if(depthai_FOUND) + set(HAVE_OAK TRUE) + endif() +endif() diff --git a/modules/gapi/include/opencv2/gapi/gframe.hpp b/modules/gapi/include/opencv2/gapi/gframe.hpp index 96913dc4cc..af5fc6bee5 100644 --- a/modules/gapi/include/opencv2/gapi/gframe.hpp +++ b/modules/gapi/include/opencv2/gapi/gframe.hpp @@ -86,6 +86,7 @@ enum class MediaFormat: int { BGR = 0, NV12, + GRAY, }; /** diff --git a/modules/gapi/include/opencv2/gapi/imgproc.hpp b/modules/gapi/include/opencv2/gapi/imgproc.hpp index 72aea24288..de6f3bcdb9 100644 --- a/modules/gapi/include/opencv2/gapi/imgproc.hpp +++ b/modules/gapi/include/opencv2/gapi/imgproc.hpp @@ -504,8 +504,8 @@ namespace imgproc { } else { - int outSz_w = static_cast(round(in.size.width * fx)); - int outSz_h = static_cast(round(in.size.height * fy)); + int outSz_w = saturate_cast(in.size.width * fx); + int outSz_h = saturate_cast(in.size.height * fy); GAPI_Assert(outSz_w > 0 && outSz_h > 0); return in.withSize(Size(outSz_w, outSz_h)); } diff --git a/modules/gapi/include/opencv2/gapi/oak/oak.hpp b/modules/gapi/include/opencv2/gapi/oak/oak.hpp new file mode 100644 index 0000000000..05fb09946f --- /dev/null +++ b/modules/gapi/include/opencv2/gapi/oak/oak.hpp @@ -0,0 +1,131 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#ifndef OPENCV_GAPI_OAK_HPP +#define OPENCV_GAPI_OAK_HPP + +#include // IStreamSource +#include // GKernelPackage +#include // GOptRunArgsP + +namespace cv { +namespace gapi { +namespace oak { + +// FIXME: copypasted from dai library +struct EncoderConfig { + /** + * Rate control mode specifies if constant or variable bitrate should be used (H264 / H265) + */ + enum class RateControlMode: int { CBR, VBR }; + + /** + * Encoding profile, H264, H265 or MJPEG + */ + enum class Profile: int { H264_BASELINE, H264_HIGH, H264_MAIN, H265_MAIN, MJPEG }; + /** + * Specifies prefered bitrate (kb) of compressed output bitstream + */ + std::int32_t bitrate = 8000; + /** + * Every x number of frames a keyframe will be inserted + */ + std::int32_t keyframeFrequency = 30; + /** + * Specifies maximum bitrate (kb) of compressed output bitstream + */ + std::int32_t maxBitrate = 8000; + /** + * Specifies number of B frames to be inserted + */ + std::int32_t numBFrames = 0; + /** + * This options specifies how many frames are available in this nodes pool (can help if + * receiver node is slow at consuming + */ + std::uint32_t numFramesPool = 4; + /** + * Encoding profile, H264, H265 or MJPEG + */ + Profile profile = Profile::H265_MAIN; + /** + * Value between 0-100% (approximates quality) + */ + std::int32_t quality = 80; + /** + * Lossless mode ([M]JPEG only) + */ + bool lossless = false; + /** + * Rate control mode specifies if constant or variable bitrate should be used (H264 / H265) + */ + RateControlMode rateCtrlMode = RateControlMode::CBR; + /** + * Input and compressed output frame width + */ + std::int32_t width = 1920; + /** + * Input and compressed output frame height + */ + std::int32_t height = 1080; + /** + * Frame rate + */ + float frameRate = 30.0f; +}; + +G_API_OP(GEncFrame, (GFrame, EncoderConfig)>, "org.opencv.oak.enc_frame") { + static GArrayDesc outMeta(const GFrameDesc&, const EncoderConfig&) { + return cv::empty_array_desc(); + } +}; + +G_API_OP(GSobelXY, , "org.opencv.oak.sobelxy") { + static GFrameDesc outMeta(const GFrameDesc& in, const cv::Mat&, const cv::Mat&) { + return in; + } +}; + +GAPI_EXPORTS GArray encode(const GFrame& in, const EncoderConfig&); + +GAPI_EXPORTS GFrame sobelXY(const GFrame& in, + const cv::Mat& hk, + const cv::Mat& vk); + +// OAK backend & kernels //////////////////////////////////////////////////////// +GAPI_EXPORTS cv::gapi::GBackend backend(); +GAPI_EXPORTS cv::gapi::GKernelPackage kernels(); + +// Camera object /////////////////////////////////////////////////////////////// + +struct GAPI_EXPORTS ColorCameraParams {}; + +class GAPI_EXPORTS ColorCamera: public cv::gapi::wip::IStreamSource { + cv::MediaFrame m_dummy; + + virtual bool pull(cv::gapi::wip::Data &data) override; + virtual GMetaArg descr_of() const override; + +public: + ColorCamera(); +}; + +} // namespace oak +} // namespace gapi + +namespace detail { +template<> struct CompileArgTag { + static const char* tag() { return "gapi.oak.colorCameraParams"; } +}; + +template<> struct CompileArgTag { + static const char* tag() { return "gapi.oak.encoderConfig"; } +}; +} // namespace detail + +} // namespace cv + +#endif // OPENCV_GAPI_OAK_HPP diff --git a/modules/gapi/include/opencv2/gapi/streaming/gstreamer/gstreamerpipeline.hpp b/modules/gapi/include/opencv2/gapi/streaming/gstreamer/gstreamerpipeline.hpp index 83afc99393..c566656cb6 100644 --- a/modules/gapi/include/opencv2/gapi/streaming/gstreamer/gstreamerpipeline.hpp +++ b/modules/gapi/include/opencv2/gapi/streaming/gstreamer/gstreamerpipeline.hpp @@ -19,12 +19,12 @@ namespace gapi { namespace wip { namespace gst { -class GAPI_EXPORTS GStreamerPipeline +class GAPI_EXPORTS_W GStreamerPipeline { public: class Priv; - explicit GStreamerPipeline(const std::string& pipeline); + GAPI_WRAP explicit GStreamerPipeline(const std::string& pipeline); IStreamSource::Ptr getStreamingSource(const std::string& appsinkName, const GStreamerSource::OutputType outputType = GStreamerSource::OutputType::MAT); @@ -40,6 +40,18 @@ protected: using GStreamerPipeline = gst::GStreamerPipeline; +// NB: Function for using from python +// FIXME: a separate function is created due to absence of wrappers for `shared_ptr<> ` +// Ideally would be to wrap the `GStreamerPipeline::getStreamingSource()` method as is +GAPI_EXPORTS_W cv::Ptr +inline get_streaming_source(cv::Ptr& pipeline, + const std::string& appsinkName, + const GStreamerSource::OutputType outputType + = GStreamerSource::OutputType::MAT) +{ + return pipeline->getStreamingSource(appsinkName, outputType); +} + } // namespace wip } // namespace gapi } // namespace cv diff --git a/modules/gapi/include/opencv2/gapi/streaming/gstreamer/gstreamersource.hpp b/modules/gapi/include/opencv2/gapi/streaming/gstreamer/gstreamersource.hpp index b81bad31b8..8b8a5ae312 100644 --- a/modules/gapi/include/opencv2/gapi/streaming/gstreamer/gstreamersource.hpp +++ b/modules/gapi/include/opencv2/gapi/streaming/gstreamer/gstreamersource.hpp @@ -32,13 +32,13 @@ namespace gst { * Pipeline can actually contain many sink elements, but it must have one and only one * appsink among them. * - * - data passed to appsink should be video-frame in NV12 format. + * - data passed to appsink should be video-frame in NV12 or GRAY8 format. * * 'outputType' is used to select type of output data to produce: 'cv::MediaFrame' or 'cv::Mat'. * To produce 'cv::MediaFrame'-s you need to pass 'GStreamerSource::OutputType::FRAME' and, * correspondingly, 'GStreamerSource::OutputType::MAT' to produce 'cv::Mat'-s. * Please note, that in the last case, output 'cv::Mat' will be of BGR format, internal conversion - * from NV12 GStreamer data will happen. + * from NV12 / GRAY8 GStreamer data will happen. * Default value for 'outputType' is 'GStreamerSource::OutputType::MAT'. * * @note Stream sources are passed to G-API via shared pointers, so please use gapi::make_src<> @@ -82,6 +82,14 @@ protected: using GStreamerSource = gst::GStreamerSource; +// NB: Overload for using from python +GAPI_EXPORTS_W cv::Ptr +inline make_gst_src(const std::string& pipeline, + const GStreamerSource::OutputType outputType = + GStreamerSource::OutputType::MAT) +{ + return make_src(pipeline, outputType); +} } // namespace wip } // namespace gapi } // namespace cv diff --git a/modules/gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp b/modules/gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp index bfd922496a..d93b4c561d 100644 --- a/modules/gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp +++ b/modules/gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp @@ -110,6 +110,62 @@ struct GAPI_EXPORTS CfgParam { static CfgParam create_implementation(uint32_t value); static CfgParam create_implementation(const char* value); + + static constexpr const char *vpp_frames_pool_size_name() { return "vpp_frames_pool_size"; } + static CfgParam create_vpp_frames_pool_size(size_t value); + + static constexpr const char *vpp_in_width_name() { return "vpp.In.Width"; } + static CfgParam create_vpp_in_width(uint16_t value); + + static constexpr const char *vpp_in_height_name() { return "vpp.In.Height"; } + static CfgParam create_vpp_in_height(uint16_t value); + + static constexpr const char *vpp_in_crop_x_name() { return "vpp.In.CropX"; } + static CfgParam create_vpp_in_crop_x(uint16_t value); + + static constexpr const char *vpp_in_crop_y_name() { return "vpp.In.CropY"; } + static CfgParam create_vpp_in_crop_y(uint16_t value); + + static constexpr const char *vpp_in_crop_w_name() { return "vpp.In.CropW"; } + static CfgParam create_vpp_in_crop_w(uint16_t value); + + static constexpr const char *vpp_in_crop_h_name() { return "vpp.In.CropH"; } + static CfgParam create_vpp_in_crop_h(uint16_t value); + + + static constexpr const char *vpp_out_fourcc_name() { return "vpp.Out.FourCC"; } + static CfgParam create_vpp_out_fourcc(uint32_t value); + + static constexpr const char *vpp_out_chroma_format_name() { return "vpp.Out.ChromaFormat"; } + static CfgParam create_vpp_out_chroma_format(uint16_t value); + + static constexpr const char *vpp_out_width_name() { return "vpp.Out.Width"; } + static CfgParam create_vpp_out_width(uint16_t value); + + static constexpr const char *vpp_out_height_name() { return "vpp.Out.Height"; } + static CfgParam create_vpp_out_height(uint16_t value); + + static constexpr const char *vpp_out_crop_x_name() { return "vpp.Out.CropX"; } + static CfgParam create_vpp_out_crop_x(uint16_t value); + + static constexpr const char *vpp_out_crop_y_name() { return "vpp.Out.CropY"; } + static CfgParam create_vpp_out_crop_y(uint16_t value); + + static constexpr const char *vpp_out_crop_w_name() { return "vpp.Out.CropW"; } + static CfgParam create_vpp_out_crop_w(uint16_t value); + + static constexpr const char *vpp_out_crop_h_name() { return "vpp.Out.CropH"; } + static CfgParam create_vpp_out_crop_h(uint16_t value); + + static constexpr const char *vpp_out_pic_struct_name() { return "vpp.Out.PicStruct"; } + static CfgParam create_vpp_out_pic_struct(uint16_t value); + + static constexpr const char *vpp_out_framerate_n_name() { return "vpp.Out.FrameRateExtN"; } + static CfgParam create_vpp_out_framerate_n(uint32_t value); + + static constexpr const char *vpp_out_framerate_d_name() { return "vpp.Out.FrameRateExtD"; } + static CfgParam create_vpp_out_framerate_d(uint32_t value); + /** * Create generic onevp::GSource configuration parameter. * diff --git a/modules/gapi/misc/python/package/gapi/__init__.py b/modules/gapi/misc/python/package/gapi/__init__.py index b1326712fc..6323582f5b 100644 --- a/modules/gapi/misc/python/package/gapi/__init__.py +++ b/modules/gapi/misc/python/package/gapi/__init__.py @@ -297,3 +297,5 @@ cv.gapi.wip.draw.Image = cv.gapi_wip_draw_Image cv.gapi.wip.draw.Poly = cv.gapi_wip_draw_Poly cv.gapi.streaming.queue_capacity = cv.gapi_streaming_queue_capacity + +cv.gapi.wip.GStreamerPipeline = cv.gapi_wip_gst_GStreamerPipeline diff --git a/modules/gapi/misc/python/pyopencv_gapi.hpp b/modules/gapi/misc/python/pyopencv_gapi.hpp index a71366250c..b4be0048d0 100644 --- a/modules/gapi/misc/python/pyopencv_gapi.hpp +++ b/modules/gapi/misc/python/pyopencv_gapi.hpp @@ -19,6 +19,7 @@ using detail_ExtractArgsCallback = cv::detail::ExtractArgsCallback; using detail_ExtractMetaCallback = cv::detail::ExtractMetaCallback; using vector_GNetParam = std::vector; using gapi_streaming_queue_capacity = cv::gapi::streaming::queue_capacity; +using GStreamerSource_OutputType = cv::gapi::wip::GStreamerSource::OutputType; // NB: Python wrapper generate T_U for T // This behavior is only observed for inputs @@ -230,7 +231,7 @@ PyObject* pyopencv_from(const cv::GArg& value) { HANDLE_CASE(BOOL, bool); HANDLE_CASE(INT, int); - HANDLE_CASE(INT64, int64_t); + HANDLE_CASE(INT64, int64_t); HANDLE_CASE(DOUBLE, double); HANDLE_CASE(FLOAT, float); HANDLE_CASE(STRING, std::string); diff --git a/modules/gapi/misc/python/test/test_gapi_streaming.py b/modules/gapi/misc/python/test/test_gapi_streaming.py index d7914c5157..d06447d791 100644 --- a/modules/gapi/misc/python/test/test_gapi_streaming.py +++ b/modules/gapi/misc/python/test/test_gapi_streaming.py @@ -34,6 +34,16 @@ try: return img + def convertNV12p2BGR(in_nv12): + shape = in_nv12.shape + y_height = shape[0] // 3 * 2 + uv_shape = (shape[0] // 3, shape[1]) + new_uv_shape = (uv_shape[0], uv_shape[1] // 2, 2) + return cv.cvtColorTwoPlane(in_nv12[:y_height, :], + in_nv12[ y_height:, :].reshape(new_uv_shape), + cv.COLOR_YUV2BGR_NV12) + + class test_gapi_streaming(NewOpenCVTests): def test_image_input(self): @@ -229,7 +239,6 @@ try: def test_gapi_streaming_meta(self): - ksize = 3 path = self.find_file('cv/video/768x576.avi', [os.environ['OPENCV_TEST_DATA_PATH']]) # G-API @@ -350,6 +359,189 @@ try: cv.gapi.compile_args(cv.gapi.streaming.queue_capacity(1))) + def get_gst_source(self, gstpipeline): + # NB: Skip test in case gstreamer isn't available. + try: + return cv.gapi.wip.make_gst_src(gstpipeline) + except cv.error as e: + if str(e).find('Built without GStreamer support!') == -1: + raise e + else: + raise unittest.SkipTest(str(e)) + + + def test_gst_source(self): + if not cv.videoio_registry.hasBackend(cv.CAP_GSTREAMER): + raise unittest.SkipTest("Backend is not available/disabled: GSTREAMER") + + gstpipeline = """videotestsrc is-live=true pattern=colors num-buffers=10 ! + videorate ! videoscale ! video/x-raw,width=1920,height=1080, + framerate=30/1 ! appsink""" + + g_in = cv.GMat() + g_out = cv.gapi.copy(g_in) + c = cv.GComputation(cv.GIn(g_in), cv.GOut(g_out)) + + ccomp = c.compileStreaming() + + source = self.get_gst_source(gstpipeline) + + ccomp.setSource(cv.gin(source)) + ccomp.start() + + has_frame, output = ccomp.pull() + while has_frame: + self.assertTrue(output.size != 0) + has_frame, output = ccomp.pull() + + + def open_VideoCapture_gstreamer(self, gstpipeline): + try: + cap = cv.VideoCapture(gstpipeline, cv.CAP_GSTREAMER) + except Exception as e: + raise unittest.SkipTest("Backend GSTREAMER can't open the video; " + + "cause: " + str(e)) + if not cap.isOpened(): + raise unittest.SkipTest("Backend GSTREAMER can't open the video") + return cap + + + def test_gst_source_accuracy(self): + if not cv.videoio_registry.hasBackend(cv.CAP_GSTREAMER): + raise unittest.SkipTest("Backend is not available/disabled: GSTREAMER") + + path = self.find_file('highgui/video/big_buck_bunny.avi', + [os.environ['OPENCV_TEST_DATA_PATH']]) + gstpipeline = """filesrc location=""" + path + """ ! decodebin ! videoconvert ! + videoscale ! video/x-raw,format=NV12 ! appsink""" + + # G-API pipeline + g_in = cv.GMat() + g_out = cv.gapi.copy(g_in) + c = cv.GComputation(cv.GIn(g_in), cv.GOut(g_out)) + + ccomp = c.compileStreaming() + + # G-API Gst-source + source = self.get_gst_source(gstpipeline) + ccomp.setSource(cv.gin(source)) + ccomp.start() + + # OpenCV Gst-source + cap = self.open_VideoCapture_gstreamer(gstpipeline) + + # Assert + max_num_frames = 10 + for _ in range(max_num_frames): + has_expected, expected = cap.read() + has_actual, actual = ccomp.pull() + + self.assertEqual(has_expected, has_actual) + + if not has_expected: + break + + self.assertEqual(0.0, cv.norm(convertNV12p2BGR(expected), actual, cv.NORM_INF)) + + + def get_gst_pipeline(self, gstpipeline): + # NB: Skip test in case gstreamer isn't available. + try: + return cv.gapi.wip.GStreamerPipeline(gstpipeline) + except cv.error as e: + if str(e).find('Built without GStreamer support!') == -1: + raise e + else: + raise unittest.SkipTest(str(e)) + except SystemError as e: + raise unittest.SkipTest(str(e) + ", casued by " + str(e.__cause__)) + + + def test_gst_multiple_sources(self): + if not cv.videoio_registry.hasBackend(cv.CAP_GSTREAMER): + raise unittest.SkipTest("Backend is not available/disabled: GSTREAMER") + + gstpipeline = """videotestsrc is-live=true pattern=colors num-buffers=10 ! + videorate ! videoscale ! + video/x-raw,width=1920,height=1080,framerate=30/1 ! + appsink name=sink1 + videotestsrc is-live=true pattern=colors num-buffers=10 ! + videorate ! videoscale ! + video/x-raw,width=1920,height=1080,framerate=30/1 ! + appsink name=sink2""" + + g_in1 = cv.GMat() + g_in2 = cv.GMat() + g_out = cv.gapi.add(g_in1, g_in2) + c = cv.GComputation(cv.GIn(g_in1, g_in2), cv.GOut(g_out)) + + ccomp = c.compileStreaming() + + pp = self.get_gst_pipeline(gstpipeline) + src1 = cv.gapi.wip.get_streaming_source(pp, "sink1") + src2 = cv.gapi.wip.get_streaming_source(pp, "sink2") + + ccomp.setSource(cv.gin(src1, src2)) + ccomp.start() + + has_frame, out = ccomp.pull() + while has_frame: + self.assertTrue(out.size != 0) + has_frame, out = ccomp.pull() + + + def test_gst_multiple_sources_accuracy(self): + if not cv.videoio_registry.hasBackend(cv.CAP_GSTREAMER): + raise unittest.SkipTest("Backend is not available/disabled: GSTREAMER") + + path = self.find_file('highgui/video/big_buck_bunny.avi', + [os.environ['OPENCV_TEST_DATA_PATH']]) + gstpipeline1 = """filesrc location=""" + path + """ ! decodebin ! videoconvert ! + videoscale ! video/x-raw,format=NV12 ! appsink""" + gstpipeline2 = """filesrc location=""" + path + """ ! decodebin ! + videoflip method=clockwise ! videoconvert ! videoscale ! + video/x-raw,format=NV12 ! appsink""" + gstpipeline_gapi = gstpipeline1 + ' name=sink1 ' + gstpipeline2 + ' name=sink2' + + # G-API pipeline + g_in1 = cv.GMat() + g_in2 = cv.GMat() + g_out1 = cv.gapi.copy(g_in1) + g_out2 = cv.gapi.copy(g_in2) + c = cv.GComputation(cv.GIn(g_in1, g_in2), cv.GOut(g_out1, g_out2)) + + ccomp = c.compileStreaming() + + # G-API Gst-source + pp = self.get_gst_pipeline(gstpipeline_gapi) + + src1 = cv.gapi.wip.get_streaming_source(pp, "sink1") + src2 = cv.gapi.wip.get_streaming_source(pp, "sink2") + ccomp.setSource(cv.gin(src1, src2)) + ccomp.start() + + # OpenCV Gst-source + cap1 = self.open_VideoCapture_gstreamer(gstpipeline1) + cap2 = self.open_VideoCapture_gstreamer(gstpipeline2) + + # Assert + max_num_frames = 10 + for _ in range(max_num_frames): + has_expected1, expected1 = cap1.read() + has_expected2, expected2 = cap2.read() + has_actual, (actual1, actual2) = ccomp.pull() + + self.assertEqual(has_expected1, has_expected2) + has_expected = has_expected1 and has_expected2 + self.assertEqual(has_expected, has_actual) + + if not has_expected: + break + + self.assertEqual(0.0, cv.norm(convertNV12p2BGR(expected1), actual1, cv.NORM_INF)) + self.assertEqual(0.0, cv.norm(convertNV12p2BGR(expected2), actual2, cv.NORM_INF)) + + except unittest.SkipTest as e: diff --git a/modules/gapi/perf/common/gapi_core_perf_tests.hpp b/modules/gapi/perf/common/gapi_core_perf_tests.hpp index 7a1568ad22..60294d2193 100644 --- a/modules/gapi/perf/common/gapi_core_perf_tests.hpp +++ b/modules/gapi/perf/common/gapi_core_perf_tests.hpp @@ -78,10 +78,6 @@ namespace opencv_test class KMeans2DPerfTest : public TestPerfParams> {}; class KMeans3DPerfTest : public TestPerfParams> {}; class TransposePerfTest : public TestPerfParams> {}; - class ResizePerfTest : public TestPerfParams> {}; - class BottleneckKernelsConstInputPerfTest : public TestPerfParams> {}; - class ResizeFxFyPerfTest : public TestPerfParams> {}; - class ResizeInSimpleGraphPerfTest : public TestPerfParams> {}; class ParseSSDBLPerfTest : public TestPerfParams>, public ParserSSDTest {}; class ParseSSDPerfTest : public TestPerfParams>, public ParserSSDTest {}; class ParseYoloPerfTest : public TestPerfParams>, public ParserYoloTest {}; diff --git a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp index d5a8d95f46..72837da199 100644 --- a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp +++ b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp @@ -436,8 +436,8 @@ PERF_TEST_P_(DivPerfTest, TestPerformance) // FIXIT Unstable input data for divide initMatsRandU(type, sz, dtype, false); - //This condition need to workaround bug in OpenCV. - //It reinitializes divider matrix without zero values. + //This condition need to workaround issue in the OpenCV. + //It reinitializes divider matrix without zero values for CV_16S DST type. if (dtype == CV_16S && dtype != type) cv::randu(in_mat2, cv::Scalar::all(1), cv::Scalar::all(255)); @@ -482,6 +482,11 @@ PERF_TEST_P_(DivCPerfTest, TestPerformance) // FIXIT Unstable input data for divide initMatsRandU(type, sz, dtype, false); + //This condition need as workaround the issue in the OpenCV. + //It reinitializes divider scalar without zero values for CV_16S DST type. + if (dtype == CV_16S || (type == CV_16S && dtype == -1)) + cv::randu(sc, cv::Scalar::all(1), cv::Scalar::all(SHRT_MAX)); + // OpenCV code /////////////////////////////////////////////////////////// cv::divide(in_mat1, sc, out_mat_ocv, scale, dtype); @@ -2282,187 +2287,6 @@ PERF_TEST_P_(TransposePerfTest, TestPerformance) //------------------------------------------------------------------------------ -PERF_TEST_P_(ResizePerfTest, TestPerformance) -{ - compare_f cmpF; - MatType type = -1; - int interp = 1; - cv::Size sz; - cv::Size sz_out; - cv::GCompileArgs compile_args; - std::tie(cmpF, type, interp, sz, sz_out, compile_args) = GetParam(); - - in_mat1 = cv::Mat(sz, type); - cv::Scalar mean = cv::Scalar::all(127); - cv::Scalar stddev = cv::Scalar::all(40.f); - cv::randn(in_mat1, mean, stddev); - out_mat_gapi = cv::Mat(sz_out, type); - out_mat_ocv = cv::Mat(sz_out, type); - - // OpenCV code /////////////////////////////////////////////////////////// - cv::resize(in_mat1, out_mat_ocv, sz_out, 0.0, 0.0, interp); - - // G-API code ////////////////////////////////////////////////////////////// - cv::GMat in; - auto out = cv::gapi::resize(in, sz_out, 0.0, 0.0, interp); - cv::GComputation c(in, out); - - // Warm-up graph engine: - auto cc = c.compile(descr_of(gin(in_mat1)), - std::move(compile_args)); - cc(gin(in_mat1), gout(out_mat_gapi)); - - TEST_CYCLE() - { - cc(gin(in_mat1), gout(out_mat_gapi)); - } - - // Comparison //////////////////////////////////////////////////////////// - { - EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv)); - } - - SANITY_CHECK_NOTHING(); -} - -//------------------------------------------------------------------------------ - -PERF_TEST_P_(ResizeFxFyPerfTest, TestPerformance) -{ - compare_f cmpF; - MatType type = -1; - int interp = 1; - cv::Size sz; - double fx = 0.0; - double fy = 0.0; - cv::GCompileArgs compile_args; - std::tie(cmpF, type, interp, sz, fx, fy, compile_args) = GetParam(); - - in_mat1 = cv::Mat(sz, type); - cv::Scalar mean = cv::Scalar::all(127); - cv::Scalar stddev = cv::Scalar::all(40.f); - cv::randn(in_mat1, mean, stddev); - cv::Size sz_out = cv::Size(saturate_cast(sz.width *fx), saturate_cast(sz.height*fy)); - out_mat_gapi = cv::Mat(sz_out, type); - out_mat_ocv = cv::Mat(sz_out, type); - - // OpenCV code /////////////////////////////////////////////////////////// - cv::resize(in_mat1, out_mat_ocv, sz_out, fx, fy, interp); - - // G-API code ////////////////////////////////////////////////////////////// - cv::GMat in; - auto out = cv::gapi::resize(in, sz_out, fx, fy, interp); - cv::GComputation c(in, out); - - // Warm-up graph engine: - auto cc = c.compile(descr_of(gin(in_mat1)), - std::move(compile_args)); - cc(gin(in_mat1), gout(out_mat_gapi)); - - TEST_CYCLE() - { - cc(gin(in_mat1), gout(out_mat_gapi)); - } - // Comparison //////////////////////////////////////////////////////////// - { - EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv)); - } - - SANITY_CHECK_NOTHING(); -} - -//------------------------------------------------------------------------------ - -// This test cases were created to control performance result of test scenario mentioned here: -// https://stackoverflow.com/questions/60629331/opencv-gapi-performance-not-good-as-expected - -PERF_TEST_P_(BottleneckKernelsConstInputPerfTest, TestPerformance) -{ - compare_f cmpF; - std::string fileName = ""; - cv::GCompileArgs compile_args; - double fx = 0.5; - double fy = 0.5; - std::tie(cmpF, fileName, compile_args) = GetParam(); - - in_mat1 = cv::imread(findDataFile(fileName)); - - cv::Mat cvvga; - cv::Mat cvgray; - cv::Mat cvblurred; - - cv::resize(in_mat1, cvvga, cv::Size(), fx, fy); - cv::cvtColor(cvvga, cvgray, cv::COLOR_BGR2GRAY); - cv::blur(cvgray, cvblurred, cv::Size(3, 3)); - cv::Canny(cvblurred, out_mat_ocv, 32, 128, 3); - - cv::GMat in; - cv::GMat vga = cv::gapi::resize(in, cv::Size(), fx, fy, INTER_LINEAR); - cv::GMat gray = cv::gapi::BGR2Gray(vga); - cv::GMat blurred = cv::gapi::blur(gray, cv::Size(3, 3)); - cv::GMat out = cv::gapi::Canny(blurred, 32, 128, 3); - cv::GComputation ac(in, out); - - auto cc = ac.compile(descr_of(gin(in_mat1)), - std::move(compile_args)); - cc(gin(in_mat1), gout(out_mat_gapi)); - - TEST_CYCLE() - { - cc(gin(in_mat1), gout(out_mat_gapi)); - } - - // Comparison //////////////////////////////////////////////////////////// - { - EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv)); - } - - SANITY_CHECK_NOTHING(); -} - -//------------------------------------------------------------------------------ - -PERF_TEST_P_(ResizeInSimpleGraphPerfTest, TestPerformance) -{ - compare_f cmpF; - MatType type = -1; - cv::Size sz; - double fx = 0.5; - double fy = 0.5; - cv::GCompileArgs compile_args; - std::tie(cmpF, type, sz, fx, fy, compile_args) = GetParam(); - - initMatsRandU(type, sz, type, false); - - cv::Mat add_res_ocv; - - cv::add(in_mat1, in_mat2, add_res_ocv); - cv::resize(add_res_ocv, out_mat_ocv, cv::Size(), fx, fy); - - cv::GMat in1, in2; - cv::GMat add_res_gapi = cv::gapi::add(in1, in2); - cv::GMat out = cv::gapi::resize(add_res_gapi, cv::Size(), fx, fy, INTER_LINEAR); - cv::GComputation ac(GIn(in1, in2), GOut(out)); - - auto cc = ac.compile(descr_of(gin(in_mat1, in_mat2)), - std::move(compile_args)); - cc(gin(in_mat1, in_mat2), gout(out_mat_gapi)); - - TEST_CYCLE() - { - cc(gin(in_mat1, in_mat2), gout(out_mat_gapi)); - } - - // Comparison //////////////////////////////////////////////////////////// - { - EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv)); - } - - SANITY_CHECK_NOTHING(); -} - -//------------------------------------------------------------------------------ - PERF_TEST_P_(ParseSSDBLPerfTest, TestPerformance) { cv::Size sz; diff --git a/modules/gapi/perf/common/gapi_imgproc_perf_tests.hpp b/modules/gapi/perf/common/gapi_imgproc_perf_tests.hpp index 16e5f13729..c1afbfea6d 100644 --- a/modules/gapi/perf/common/gapi_imgproc_perf_tests.hpp +++ b/modules/gapi/perf/common/gapi_imgproc_perf_tests.hpp @@ -99,6 +99,10 @@ class YUV2BGRPerfTest : public TestPerfParams> {}; class BayerGR2RGBPerfTest : public TestPerfParams> {}; class RGB2YUV422PerfTest : public TestPerfParams> {}; +class ResizePerfTest : public TestPerfParams> {}; +class ResizeFxFyPerfTest : public TestPerfParams> {}; +class ResizeInSimpleGraphPerfTest : public TestPerfParams> {}; +class BottleneckKernelsConstInputPerfTest : public TestPerfParams> {}; } // opencv_test #endif //OPENCV_GAPI_IMGPROC_PERF_TESTS_HPP diff --git a/modules/gapi/perf/common/gapi_imgproc_perf_tests_inl.hpp b/modules/gapi/perf/common/gapi_imgproc_perf_tests_inl.hpp index 57c8130338..475daa84c1 100644 --- a/modules/gapi/perf/common/gapi_imgproc_perf_tests_inl.hpp +++ b/modules/gapi/perf/common/gapi_imgproc_perf_tests_inl.hpp @@ -1761,5 +1761,187 @@ PERF_TEST_P_(RGB2YUV422PerfTest, TestPerformance) //------------------------------------------------------------------------------ +PERF_TEST_P_(ResizePerfTest, TestPerformance) +{ + compare_f cmpF; + MatType type = -1; + int interp = 1; + cv::Size sz; + cv::Size sz_out; + cv::GCompileArgs compile_args; + std::tie(cmpF, type, interp, sz, sz_out, compile_args) = GetParam(); + + in_mat1 = cv::Mat(sz, type); + cv::Scalar mean = cv::Scalar::all(127); + cv::Scalar stddev = cv::Scalar::all(40.f); + cv::randn(in_mat1, mean, stddev); + out_mat_gapi = cv::Mat(sz_out, type); + out_mat_ocv = cv::Mat(sz_out, type); + + // OpenCV code /////////////////////////////////////////////////////////// + cv::resize(in_mat1, out_mat_ocv, sz_out, 0.0, 0.0, interp); + + // G-API code ////////////////////////////////////////////////////////////// + cv::GMat in; + auto out = cv::gapi::resize(in, sz_out, 0.0, 0.0, interp); + cv::GComputation c(in, out); + + // Warm-up graph engine: + auto cc = c.compile(descr_of(gin(in_mat1)), + std::move(compile_args)); + cc(gin(in_mat1), gout(out_mat_gapi)); + + TEST_CYCLE() + { + cc(gin(in_mat1), gout(out_mat_gapi)); + } + + // Comparison //////////////////////////////////////////////////////////// + { + EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv)); + } + + SANITY_CHECK_NOTHING(); +} + +//------------------------------------------------------------------------------ + +PERF_TEST_P_(ResizeFxFyPerfTest, TestPerformance) +{ + compare_f cmpF; + MatType type = -1; + int interp = 1; + cv::Size sz; + double fx = 1.0; + double fy = 1.0; + cv::GCompileArgs compile_args; + std::tie(cmpF, type, interp, sz, fx, fy, compile_args) = GetParam(); + + in_mat1 = cv::Mat(sz, type); + cv::Scalar mean = cv::Scalar::all(127); + cv::Scalar stddev = cv::Scalar::all(40.f); + cv::randn(in_mat1, mean, stddev); + cv::Size sz_out = cv:: Size(saturate_cast(sz.width*fx), saturate_cast(sz.height*fy)); + out_mat_gapi = cv::Mat(sz_out, type); + out_mat_ocv = cv::Mat(sz_out, type); + + // OpenCV code /////////////////////////////////////////////////////////// + cv::resize(in_mat1, out_mat_ocv, sz_out, fx, fy, interp); + + // G-API code ////////////////////////////////////////////////////////////// + cv::GMat in; + auto out = cv::gapi::resize(in, sz_out, fx, fy, interp); + cv::GComputation c(in, out); + + // Warm-up graph engine: + auto cc = c.compile(descr_of(gin(in_mat1)), + std::move(compile_args)); + cc(gin(in_mat1), gout(out_mat_gapi)); + + TEST_CYCLE() + { + cc(gin(in_mat1), gout(out_mat_gapi)); + } + + // Comparison //////////////////////////////////////////////////////////// + { + EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv)); + } + + SANITY_CHECK_NOTHING(); +} + +//------------------------------------------------------------------------------ + +PERF_TEST_P_(ResizeInSimpleGraphPerfTest, TestPerformance) +{ + compare_f cmpF; + MatType type = -1; + cv::Size sz; + double fx = 0.5; + double fy = 0.5; + cv::GCompileArgs compile_args; + std::tie(cmpF, type, sz, fx, fy, compile_args) = GetParam(); + + initMatsRandU(type, sz, type, false); + + cv::Mat add_res_ocv; + + cv::add(in_mat1, in_mat2, add_res_ocv); + cv::resize(add_res_ocv, out_mat_ocv, cv::Size(), fx, fy); + + cv::GMat in1, in2; + cv::GMat add_res_gapi = cv::gapi::add(in1, in2); + cv::GMat out = cv::gapi::resize(add_res_gapi, cv::Size(), fx, fy, INTER_LINEAR); + cv::GComputation ac(GIn(in1, in2), GOut(out)); + + auto cc = ac.compile(descr_of(gin(in_mat1, in_mat2)), + std::move(compile_args)); + cc(gin(in_mat1, in_mat2), gout(out_mat_gapi)); + + TEST_CYCLE() + { + cc(gin(in_mat1, in_mat2), gout(out_mat_gapi)); + } + + // Comparison //////////////////////////////////////////////////////////// + { + EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv)); + } + + SANITY_CHECK_NOTHING(); +} + +//------------------------------------------------------------------------------ + +// This test cases were created to control performance result of test scenario mentioned here: +// https://stackoverflow.com/questions/60629331/opencv-gapi-performance-not-good-as-expected + +PERF_TEST_P_(BottleneckKernelsConstInputPerfTest, TestPerformance) +{ + compare_f cmpF; + std::string fileName = ""; + cv::GCompileArgs compile_args; + double fx = 0.5; + double fy = 0.5; + std::tie(cmpF, fileName, compile_args) = GetParam(); + + in_mat1 = cv::imread(findDataFile(fileName)); + + cv::Mat cvvga; + cv::Mat cvgray; + cv::Mat cvblurred; + + cv::resize(in_mat1, cvvga, cv::Size(), fx, fy); + cv::cvtColor(cvvga, cvgray, cv::COLOR_BGR2GRAY); + cv::blur(cvgray, cvblurred, cv::Size(3, 3)); + cv::Canny(cvblurred, out_mat_ocv, 32, 128, 3); + + cv::GMat in; + cv::GMat vga = cv::gapi::resize(in, cv::Size(), fx, fy, INTER_LINEAR); + cv::GMat gray = cv::gapi::BGR2Gray(vga); + cv::GMat blurred = cv::gapi::blur(gray, cv::Size(3, 3)); + cv::GMat out = cv::gapi::Canny(blurred, 32, 128, 3); + cv::GComputation ac(in, out); + + auto cc = ac.compile(descr_of(gin(in_mat1)), + std::move(compile_args)); + cc(gin(in_mat1), gout(out_mat_gapi)); + + TEST_CYCLE() + { + cc(gin(in_mat1), gout(out_mat_gapi)); + } + + // Comparison //////////////////////////////////////////////////////////// + { + EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv)); + } + + SANITY_CHECK_NOTHING(); +} + +//------------------------------------------------------------------------------ + } #endif //OPENCV_GAPI_IMGPROC_PERF_TESTS_INL_HPP diff --git a/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp b/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp index 5323ea8f08..2f91e07e52 100644 --- a/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp +++ b/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp @@ -369,38 +369,6 @@ INSTANTIATE_TEST_CASE_P(TransposePerfTestCPU, TransposePerfTest, CV_8UC3, CV_16UC3, CV_16SC3, CV_32FC3), Values(cv::compile_args(CORE_CPU)))); -INSTANTIATE_TEST_CASE_P(ResizePerfTestCPU, ResizePerfTest, - Combine(Values(AbsExact().to_compare_f()), - Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1), - Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA), - Values(szSmall128, szVGA, sz720p, sz1080p), - Values( cv::Size(64, 64), - cv::Size(32, 32)), - Values(cv::compile_args(CORE_CPU)))); - -INSTANTIATE_TEST_CASE_P(BottleneckKernelsPerfTestCPU, BottleneckKernelsConstInputPerfTest, - Combine(Values(AbsExact().to_compare_f()), - Values( "cv/optflow/frames/1080p_00.png", "cv/optflow/frames/720p_00.png", - "cv/optflow/frames/VGA_00.png", "cv/dnn_face/recognition/Aaron_Tippin_0001.jpg"), - Values(cv::compile_args(CORE_CPU)))); - -INSTANTIATE_TEST_CASE_P(ResizeInSimpleGraphPerfTestCPU, ResizeInSimpleGraphPerfTest, - Combine(Values(AbsExact().to_compare_f()), - Values(CV_8UC3), - Values(szSmall128, szVGA, sz720p, sz1080p), - Values(0.5), - Values(0.5), - Values(cv::compile_args(CORE_CPU)))); - -INSTANTIATE_TEST_CASE_P(ResizeFxFyPerfTestCPU, ResizeFxFyPerfTest, - Combine(Values(AbsExact().to_compare_f()), - Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1), - Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA), - Values(szSmall128, szVGA, sz720p, sz1080p), - Values(0.5, 0.1), - Values(0.5, 0.1), - Values(cv::compile_args(CORE_CPU)))); - INSTANTIATE_TEST_CASE_P(ParseSSDBLPerfTestCPU, ParseSSDBLPerfTest, Combine(Values(sz720p, sz1080p), Values(0.3f, 0.7f), diff --git a/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp b/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp index e25029b835..796d05101e 100644 --- a/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp +++ b/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp @@ -49,8 +49,8 @@ INSTANTIATE_TEST_CASE_P(SubPerfTestFluid, SubPerfTest, INSTANTIATE_TEST_CASE_P(SubCPerfTestFluid, SubCPerfTest, Combine(Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_f()), Values(szSmall128, szVGA, sz720p, sz1080p), - Values(CV_8UC1, CV_8UC3, CV_16SC1, CV_32FC1), - Values(-1, CV_8U, CV_32F), + Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1), + Values(-1, CV_8U, CV_16U, CV_16S, CV_32F), Values(cv::compile_args(CORE_FLUID)))); INSTANTIATE_TEST_CASE_P(SubRCPerfTestFluid, SubRCPerfTest, @@ -78,8 +78,8 @@ INSTANTIATE_TEST_CASE_P(MulDoublePerfTestFluid, MulDoublePerfTest, INSTANTIATE_TEST_CASE_P(MulCPerfTestFluid, MulCPerfTest, Combine(Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_f()), Values(szSmall128, szVGA, sz720p, sz1080p), - Values(CV_8UC1, CV_8UC3, CV_16SC1, CV_32FC1), - Values(-1, CV_8U, CV_32F), + Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1), + Values(-1, CV_8U, CV_16U, CV_16S, CV_32F), Values(cv::compile_args(CORE_FLUID)))); INSTANTIATE_TEST_CASE_P(DivPerfTestFluid, DivPerfTest, @@ -93,8 +93,8 @@ INSTANTIATE_TEST_CASE_P(DivPerfTestFluid, DivPerfTest, INSTANTIATE_TEST_CASE_P(DivCPerfTestFluid, DivCPerfTest, Combine(Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_f()), Values(szSmall128, szVGA, sz720p, sz1080p), - Values(CV_8UC1, CV_8UC3, CV_16SC1, CV_32FC1), - Values(-1, CV_8U, CV_32F), + Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1), + Values(-1, CV_8U, CV_16U, CV_16S, CV_32F), Values(1.0), Values(cv::compile_args(CORE_FLUID)))); @@ -328,36 +328,4 @@ INSTANTIATE_TEST_CASE_P(ConvertToPerfTestFluid, ConvertToPerfTest, Values(0.0), Values(cv::compile_args(CORE_FLUID)))); -INSTANTIATE_TEST_CASE_P(ResizePerfTestFluid, ResizePerfTest, - Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()), - Values(CV_8UC3), - Values(cv::INTER_LINEAR), - Values(szSmall128, szVGA, sz720p, sz1080p), - Values(cv::Size(64, 64), - cv::Size(30, 30)), - Values(cv::compile_args(CORE_FLUID)))); - -#define IMGPROC_FLUID cv::gapi::imgproc::fluid::kernels() -INSTANTIATE_TEST_CASE_P(BottleneckKernelsPerfTestFluid, BottleneckKernelsConstInputPerfTest, - Combine(Values(AbsSimilarPoints(0, 1).to_compare_f()), - Values("cv/optflow/frames/1080p_00.png", "cv/optflow/frames/720p_00.png", - "cv/optflow/frames/VGA_00.png", "cv/dnn_face/recognition/Aaron_Tippin_0001.jpg"), - Values(cv::compile_args(CORE_FLUID, IMGPROC_FLUID)))); - -INSTANTIATE_TEST_CASE_P(ResizeInSimpleGraphPerfTestFluid, ResizeInSimpleGraphPerfTest, - Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()), - Values(CV_8UC3), - Values(szSmall128, szVGA, sz720p, sz1080p), - Values(0.5), - Values(0.5), - Values(cv::compile_args(CORE_FLUID, IMGPROC_FLUID)))); - -INSTANTIATE_TEST_CASE_P(ResizeFxFyPerfTestFluid, ResizeFxFyPerfTest, - Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()), - Values(CV_8UC3), - Values(cv::INTER_LINEAR), - Values(szSmall128, szVGA, sz720p, sz1080p), - Values(0.5, 0.1), - Values(0.5, 0.1), - Values(cv::compile_args(CORE_FLUID)))); } // opencv_test diff --git a/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_cpu.cpp b/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_cpu.cpp index d4c37c10af..dc4c65bf74 100644 --- a/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_cpu.cpp +++ b/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_cpu.cpp @@ -403,4 +403,36 @@ INSTANTIATE_TEST_CASE_P(RGB2YUV422PerfTestCPU, RGB2YUV422PerfTest, Combine(Values(ToleranceColor(1e-3).to_compare_f()), Values(szVGA, sz720p, sz1080p), Values(cv::compile_args(IMGPROC_CPU)))); + +INSTANTIATE_TEST_CASE_P(ResizePerfTestCPU, ResizePerfTest, + Combine(Values(AbsExact().to_compare_f()), + Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1), + Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA), + Values(szSmall128, szVGA, sz720p, sz1080p), + Values( cv::Size(64, 64), + cv::Size(32, 32)), + Values(cv::compile_args(IMGPROC_CPU)))); + +INSTANTIATE_TEST_CASE_P(BottleneckKernelsPerfTestCPU, BottleneckKernelsConstInputPerfTest, + Combine(Values(AbsExact().to_compare_f()), + Values( "cv/optflow/frames/1080p_00.png", "cv/optflow/frames/720p_00.png", + "cv/optflow/frames/VGA_00.png", "cv/dnn_face/recognition/Aaron_Tippin_0001.jpg"), + Values(cv::compile_args(IMGPROC_CPU)))); + +INSTANTIATE_TEST_CASE_P(ResizeInSimpleGraphPerfTestCPU, ResizeInSimpleGraphPerfTest, + Combine(Values(AbsExact().to_compare_f()), + Values(CV_8UC3), + Values(szSmall128, szVGA, sz720p, sz1080p), + Values(0.5), + Values(0.5), + Values(cv::compile_args(IMGPROC_CPU)))); + +INSTANTIATE_TEST_CASE_P(ResizeFxFyPerfTestCPU, ResizeFxFyPerfTest, + Combine(Values(AbsExact().to_compare_f()), + Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1), + Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA), + Values(szSmall128, szVGA, sz720p, sz1080p), + Values(0.5, 0.1), + Values(0.5, 0.1), + Values(cv::compile_args(IMGPROC_CPU)))); } // opencv_test diff --git a/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp b/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp index 1ccd763099..b5e72ae4ce 100644 --- a/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp +++ b/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp @@ -9,6 +9,7 @@ #include "../common/gapi_imgproc_perf_tests.hpp" #define IMGPROC_FLUID cv::gapi::imgproc::fluid::kernels() +#define CORE_FLUID cv::gapi::core::fluid::kernels() namespace opencv_test { @@ -198,4 +199,36 @@ INSTANTIATE_TEST_CASE_P(RGB2LabPerfTestFluid, RGB2LabPerfTest, Values(szVGA, sz720p, sz1080p), Values(cv::compile_args(IMGPROC_FLUID)))); +INSTANTIATE_TEST_CASE_P(ResizePerfTestFluid, ResizePerfTest, + Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()), + Values(CV_8UC3), + Values(cv::INTER_LINEAR), + Values(szSmall128, szVGA, sz720p, sz1080p), + Values(cv::Size(64, 64), + cv::Size(30, 30)), + Values(cv::compile_args(IMGPROC_FLUID)))); + +#define IMGPROC_FLUID cv::gapi::imgproc::fluid::kernels() +INSTANTIATE_TEST_CASE_P(BottleneckKernelsPerfTestFluid, BottleneckKernelsConstInputPerfTest, + Combine(Values(AbsSimilarPoints(0, 1).to_compare_f()), + Values("cv/optflow/frames/1080p_00.png", "cv/optflow/frames/720p_00.png", + "cv/optflow/frames/VGA_00.png", "cv/dnn_face/recognition/Aaron_Tippin_0001.jpg"), + Values(cv::compile_args(IMGPROC_FLUID)))); + +INSTANTIATE_TEST_CASE_P(ResizeInSimpleGraphPerfTestFluid, ResizeInSimpleGraphPerfTest, + Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()), + Values(CV_8UC3), + Values(szSmall128, szVGA, sz720p, sz1080p), + Values(0.5), + Values(0.5), + Values(cv::compile_args(cv::gapi::combine(IMGPROC_FLUID, CORE_FLUID))))); + +INSTANTIATE_TEST_CASE_P(ResizeFxFyPerfTestFluid, ResizeFxFyPerfTest, + Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()), + Values(CV_8UC3), + Values(cv::INTER_LINEAR), + Values(szSmall128, szVGA, sz720p, sz1080p), + Values(0.5, 0.25, 2), + Values(0.5, 0.25, 2), + Values(cv::compile_args(IMGPROC_FLUID)))); } diff --git a/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp b/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp index 6aaec4d79a..8aaa304e58 100644 --- a/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp +++ b/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp @@ -79,7 +79,7 @@ INSTANTIATE_TEST_CASE_P(DivPerfTestGPU, DivPerfTest, Values(cv::compile_args(CORE_GPU)))); INSTANTIATE_TEST_CASE_P(DivCPerfTestGPU, DivCPerfTest, - Combine(Values(AbsExact().to_compare_f()), + Combine(Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_f()), Values( szSmall128, szVGA, sz720p, sz1080p ), Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ), Values( -1, CV_8U, CV_16U, CV_32F ), @@ -320,25 +320,7 @@ INSTANTIATE_TEST_CASE_P(TransposePerfTestGPU, TransposePerfTest, Combine(Values(AbsExact().to_compare_f()), Values(szSmall128, szVGA, sz720p, sz1080p), Values(CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1, - CV_8UC2, CV_16UC2, CV_16SC2, CV_32FC2, - CV_8UC3, CV_16UC3, CV_16SC3, CV_32FC3), - Values(cv::compile_args(CORE_GPU)))); - -INSTANTIATE_TEST_CASE_P(ResizePerfTestGPU, ResizePerfTest, - Combine(Values(AbsSimilarPoints(2, 0.05).to_compare_f()), - Values(CV_8UC1, CV_16UC1, CV_16SC1), - Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA), - Values( szSmall128, szVGA, sz720p, sz1080p ), - Values(cv::Size(64,64), - cv::Size(30,30)), - Values(cv::compile_args(CORE_GPU)))); - -INSTANTIATE_TEST_CASE_P(ResizeFxFyPerfTestGPU, ResizeFxFyPerfTest, - Combine(Values(AbsSimilarPoints(2, 0.05).to_compare_f()), - Values(CV_8UC1, CV_16UC1, CV_16SC1), - Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA), - Values( szSmall128, szVGA, sz720p, sz1080p ), - Values(0.5, 0.1), - Values(0.5, 0.1), + CV_8UC2, CV_16UC2, CV_16SC2, CV_32FC2, + CV_8UC3, CV_16UC3, CV_16SC3, CV_32FC3), Values(cv::compile_args(CORE_GPU)))); } // opencv_test diff --git a/modules/gapi/perf/gpu/gapi_imgproc_perf_tests_gpu.cpp b/modules/gapi/perf/gpu/gapi_imgproc_perf_tests_gpu.cpp index 1f4f3883d1..faacf4f254 100644 --- a/modules/gapi/perf/gpu/gapi_imgproc_perf_tests_gpu.cpp +++ b/modules/gapi/perf/gpu/gapi_imgproc_perf_tests_gpu.cpp @@ -204,4 +204,21 @@ INSTANTIATE_TEST_CASE_P(YUV2BGRPerfTestGPU, YUV2BGRPerfTest, Values(szVGA, sz720p, sz1080p), Values(cv::compile_args(IMGPROC_GPU)))); +INSTANTIATE_TEST_CASE_P(ResizePerfTestGPU, ResizePerfTest, + Combine(Values(AbsSimilarPoints(2, 0.05).to_compare_f()), + Values(CV_8UC1, CV_16UC1, CV_16SC1), + Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA), + Values( szSmall128, szVGA, sz720p, sz1080p ), + Values(cv::Size(64,64), + cv::Size(30,30)), + Values(cv::compile_args(IMGPROC_GPU)))); + +INSTANTIATE_TEST_CASE_P(ResizeFxFyPerfTestGPU, ResizeFxFyPerfTest, + Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()), + Values(CV_8UC1, CV_16UC1, CV_16SC1), + Values(cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_AREA), + Values(szSmall128, szVGA, sz720p, sz1080p), + Values(0.5, 0.1), + Values(0.5, 0.1), + Values(cv::compile_args(IMGPROC_GPU)))); } diff --git a/modules/gapi/samples/data/config_template.yml b/modules/gapi/samples/data/config_template.yml new file mode 100644 index 0000000000..75012d959d --- /dev/null +++ b/modules/gapi/samples/data/config_template.yml @@ -0,0 +1,192 @@ +%YAML:1.0 + +# Application running time in milliseconds: integer. +work_time: 2000 + +Pipelines: + PL1: + source: + name: 'Src' + latency: 33.0 + output: + dims: [1, 3, 1280, 720] + precision: 'U8' + + nodes: + - name: 'PP' + type: 'Dummy' + time: 0.2 + output: + dims: [1, 3, 300, 300] + precision: 'U8' + + - name: 'Infer' + type: 'Infer' + xml: 'face-detection-retail-0004.xml' + bin: 'face-detection-retail-0004.bin' + device: 'CPU' + input_layers: + - 'data' + output_layers: + - 'detection_out' + + edges: + - from: 'Src' + to: 'PP' + - from: 'PP' + to: 'Infer' + + # Path to the dump file (*.dot)' + dump: 'pl1.dot' + + PL2: + source: + name: 'Src' + latency: 50.0 + output: + dims: [1, 3, 1280, 720] + precision: 'U8' + + nodes: + - name: 'M1_PP' + type: 'Dummy' + time: 0.2 + output: + dims: [1, 3, 300, 300] + precision: 'U8' + + - name: 'M1_Infer' + type: 'Infer' + xml: 'face-detection-retail-0004.xml' + bin: 'face-detection-retail-0004.bin' + device: 'CPU' + input_layers: + - 'data' + output_layers: + - 'detection_out' + + - name: 'M2_PP' + type: 'Dummy' + time: 0.2 + output: + dims: [1, 3, 300, 300] + precision: 'U8' + + - name: 'M2_Infer' + type: 'Infer' + xml: 'face-detection-retail-0004.xml' + bin: 'face-detection-retail-0004.bin' + device: 'CPU' + input_layers: + - 'data' + output_layers: + - 'detection_out' + + - name: 'M3_PP' + type: 'Dummy' + time: 0.2 + output: + dims: [1, 3, 300, 300] + precision: 'U8' + + - name: 'M3_Infer' + type: 'Infer' + xml: 'face-detection-retail-0004.xml' + bin: 'face-detection-retail-0004.bin' + device: 'CPU' + input_layers: + - 'data' + output_layers: + - 'detection_out' + + - name: 'M4_PP' + type: 'Dummy' + time: 0.2 + output: + dims: [1, 3, 300, 300] + precision: 'U8' + + - name: 'M4_Infer' + type: 'Infer' + xml: 'face-detection-retail-0004.xml' + bin: 'face-detection-retail-0004.bin' + device: 'CPU' + input_layers: + - 'data' + output_layers: + - 'detection_out' + + - name: 'M5_PP' + type: 'Dummy' + time: 0.2 + output: + dims: [1, 3, 300, 300] + precision: 'U8' + + - name: 'M5_Infer' + type: 'Infer' + xml: 'face-detection-retail-0004.xml' + bin: 'face-detection-retail-0004.bin' + device: 'CPU' + input_layers: + - 'data' + output_layers: + - 'detection_out' + + edges: + - from: 'Src' + to: 'M1_PP' + - from: 'M1_PP' + to: 'M1_Infer' + - from: 'M1_Infer' + to: 'M2_PP' + - from: 'M2_PP' + to: 'M2_Infer' + - from: 'M2_Infer' + to: 'M3_PP' + - from: 'M3_PP' + to: 'M3_Infer' + - from: 'M3_Infer' + to: 'M4_PP' + - from: 'M4_PP' + to: 'M4_Infer' + - from: 'M4_Infer' + to: 'M5_PP' + - from: 'M5_PP' + to: 'M5_Infer' + + dump: 'pl2.dot' + + PL3: + source: + name: 'Src' + latency: 33.0 + output: + dims: [1, 3, 1280, 720] + precision: 'U8' + + nodes: + - name: 'PP' + type: 'Dummy' + time: 0.2 + output: + dims: [1, 3, 300, 300] + precision: 'U8' + + - name: 'Infer' + type: 'Infer' + xml: 'face-detection-retail-0004.xml' + bin: 'face-detection-retail-0004.bin' + device: 'CPU' + input_layers: + - 'data' + output_layers: + - 'detection_out' + + edges: + - from: 'Src' + to: 'PP' + - from: 'PP' + to: 'Infer' + + dump: 'pl3.dot' diff --git a/modules/gapi/samples/oak_rgb_camera_encoding.cpp b/modules/gapi/samples/oak_rgb_camera_encoding.cpp new file mode 100644 index 0000000000..ac6b5cc5f0 --- /dev/null +++ b/modules/gapi/samples/oak_rgb_camera_encoding.cpp @@ -0,0 +1,70 @@ +#include + +#include +#include +#include + +#include +#include // BGR accessor + +#include // CommandLineParser + +const std::string keys = + "{ h help | | Print this help message }" + "{ output | output.h265 | Path to the output .h265 video file }"; + +#ifdef HAVE_OAK + +int main(int argc, char *argv[]) { + cv::CommandLineParser cmd(argc, argv, keys); + if (cmd.has("help")) { + cmd.printMessage(); + return 0; + } + + const std::string output_name = cmd.get("output"); + + cv::gapi::oak::EncoderConfig cfg; + cfg.profile = cv::gapi::oak::EncoderConfig::Profile::H265_MAIN; + + cv::GFrame in; + cv::GArray encoded = cv::gapi::oak::encode(in, cfg); + + auto args = cv::compile_args(cv::gapi::oak::ColorCameraParams{}, cv::gapi::oak::kernels()); + + auto pipeline = cv::GComputation(cv::GIn(in), cv::GOut(encoded)).compileStreaming(std::move(args)); + + // Graph execution ///////////////////////////////////////////////////////// + pipeline.setSource(cv::gapi::wip::make_src()); + pipeline.start(); + + std::vector out_h265_data; + + std::ofstream out_h265_file; + out_h265_file.open(output_name, std::ofstream::out | std::ofstream::binary | std::ofstream::trunc); + + // Pull 300 frames from the camera + uint32_t frames = 300; + uint32_t pulled = 0; + + while (pipeline.pull(cv::gout(out_h265_data))) { + if (out_h265_file.is_open()) { + out_h265_file.write(reinterpret_cast(out_h265_data.data()), + out_h265_data.size()); + } + if (pulled++ == frames) { + pipeline.stop(); + break; + } + } + + std::cout << "Pipeline finished: " << output_name << " file has been written." << std::endl; +} +#else // HAVE_OAK + +int main() { + GAPI_Assert(false && "Built without OAK support"); + return -1; +} + +#endif // HAVE_OAK diff --git a/modules/gapi/samples/oak_small_hetero_pipeline.cpp b/modules/gapi/samples/oak_small_hetero_pipeline.cpp new file mode 100644 index 0000000000..dadb9d0f3b --- /dev/null +++ b/modules/gapi/samples/oak_small_hetero_pipeline.cpp @@ -0,0 +1,69 @@ +#include +#include +#include +#include +#include + +#include +#include // BGR accessor + +#include // CommandLineParser + +const std::string keys = + "{ h help | | Print this help message }" + "{ output | output.png | Path to the output file }"; + +#ifdef HAVE_OAK + +int main(int argc, char *argv[]) { + cv::CommandLineParser cmd(argc, argv, keys); + if (cmd.has("help")) { + cmd.printMessage(); + return 0; + } + + const std::string output_name = cmd.get("output"); + + std::vector h = {1, 0, -1, + 2, 0, -2, + 1, 0, -1}; + std::vector v = { 1, 2, 1, + 0, 0, 0, + -1, -2, -1}; + cv::Mat hk(3, 3, CV_32SC1, h.data()); + cv::Mat vk(3, 3, CV_32SC1, v.data()); + + // Heterogeneous pipeline: + // OAK camera -> Sobel -> streaming accessor (CPU) + cv::GFrame in; + cv::GFrame sobel = cv::gapi::oak::sobelXY(in, hk, vk); + // Default camera and then sobel work only with nv12 format + cv::GMat out = cv::gapi::streaming::Y(sobel); + + auto args = cv::compile_args(cv::gapi::oak::ColorCameraParams{}, + cv::gapi::oak::kernels()); + + auto pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out)).compileStreaming(std::move(args)); + + // Graph execution ///////////////////////////////////////////////////////// + cv::Mat out_mat(1920, 1080, CV_8UC1); + + pipeline.setSource(cv::gapi::wip::make_src()); + pipeline.start(); + + // pull 1 frame + pipeline.pull(cv::gout(out_mat)); + + cv::imwrite(output_name, out_mat); + + std::cout << "Pipeline finished: " << output_name << " file has been written." << std::endl; +} + +#else // HAVE_OAK + +int main() { + GAPI_Assert(false && "Built without OAK support"); + return -1; +} + +#endif // HAVE_OAK diff --git a/modules/gapi/samples/onevpl_infer_single_roi.cpp b/modules/gapi/samples/onevpl_infer_single_roi.cpp index 06950bcabe..80327e2d59 100644 --- a/modules/gapi/samples/onevpl_infer_single_roi.cpp +++ b/modules/gapi/samples/onevpl_infer_single_roi.cpp @@ -45,10 +45,15 @@ const std::string keys = "{ faced | AUTO | Target device for face detection model (e.g. AUTO, GPU, VPU, ...) }" "{ cfg_params | :;: | Semicolon separated list of oneVPL mfxVariants which is used for configuring source (see `MFXSetConfigFilterProperty` by https://spec.oneapi.io/versions/latest/elements/oneVPL/source/index.html) }" "{ streaming_queue_capacity | 1 | Streaming executor queue capacity. Calculated automaticaly if 0 }" - "{ frames_pool_size | 0 | OneVPL source applies this parameter as preallocated frames pool size}"; - + "{ frames_pool_size | 0 | OneVPL source applies this parameter as preallocated frames pool size}" + "{ vpp_frames_pool_size | 0 | OneVPL source applies this parameter as preallocated frames pool size for VPP preprocessing results}" + "{ source_preproc_enable | 0 | Turn on OneVPL source frame preprocessing using network input description instead of IE plugin preprocessing}"; namespace { +bool is_gpu(const std::string &device_name) { + return device_name.find("GPU") != std::string::npos; +} + std::string get_weights_path(const std::string &model_path) { const auto EXT_LEN = 4u; const auto sz = model_path.size(); @@ -123,8 +128,9 @@ using GRect = cv::GOpaque; using GSize = cv::GOpaque; using GPrims = cv::GArray; -G_API_OP(LocateROI, , "sample.custom.locate-roi") { - static cv::GOpaqueDesc outMeta(const cv::GOpaqueDesc &) { +G_API_OP(LocateROI, )>, "sample.custom.locate-roi") { + static cv::GOpaqueDesc outMeta(const cv::GOpaqueDesc &, + std::reference_wrapper) { return cv::empty_gopaque_desc(); } }; @@ -145,18 +151,30 @@ GAPI_OCV_KERNEL(OCVLocateROI, LocateROI) { // but only crops the input image to square (this is // the most convenient aspect ratio for detectors to use) - static void run(const cv::Size& in_size, cv::Rect &out_rect) { + static void run(const cv::Size& in_size, + std::reference_wrapper device_id_ref, + cv::Rect &out_rect) { // Identify the central point & square size (- some padding) - const auto center = cv::Point{in_size.width/2, in_size.height/2}; - auto sqside = std::min(in_size.width, in_size.height); + // NB: GPU plugin in InferenceEngine doesn't support ROI at now + if (!is_gpu(device_id_ref.get())) { + const auto center = cv::Point{in_size.width/2, in_size.height/2}; + auto sqside = std::min(in_size.width, in_size.height); - // Now build the central square ROI - out_rect = cv::Rect{ center.x - sqside/2 - , center.y - sqside/2 - , sqside - , sqside - }; + // Now build the central square ROI + out_rect = cv::Rect{ center.x - sqside/2 + , center.y - sqside/2 + , sqside + , sqside + }; + } else { + // use whole frame for GPU device + out_rect = cv::Rect{ 0 + , 0 + , in_size.width + , in_size.height + }; + } } }; @@ -193,11 +211,14 @@ int main(int argc, char *argv[]) { } // get file name - std::string file_path = cmd.get("input"); - const std::string output = cmd.get("output"); + const auto file_path = cmd.get("input"); + const auto output = cmd.get("output"); const auto face_model_path = cmd.get("facem"); const auto streaming_queue_capacity = cmd.get("streaming_queue_capacity"); - const auto source_queue_capacity = cmd.get("frames_pool_size"); + const auto source_decode_queue_capacity = cmd.get("frames_pool_size"); + const auto source_vpp_queue_capacity = cmd.get("vpp_frames_pool_size"); + const auto vpl_source_preproc_enable = cmd.get("source_preproc_enable"); + const auto device_id = cmd.get("faced"); // check ouput file extension if (!output.empty()) { @@ -214,6 +235,12 @@ int main(int argc, char *argv[]) { try { std::string line; while (std::getline(params_list, line, ';')) { + if (vpl_source_preproc_enable == 0) { + if (line.find("vpp.") != std::string::npos) { + // skip VPP preprocessing primitives if not requested + continue; + } + } source_cfgs.push_back(cfg::create_from_string(line)); } } catch (const std::exception& ex) { @@ -221,11 +248,13 @@ int main(int argc, char *argv[]) { return -1; } - if (source_queue_capacity != 0) { - source_cfgs.push_back(cv::gapi::wip::onevpl::CfgParam::create_frames_pool_size(source_queue_capacity)); + if (source_decode_queue_capacity != 0) { + source_cfgs.push_back(cv::gapi::wip::onevpl::CfgParam::create_frames_pool_size(source_decode_queue_capacity)); + } + if (source_vpp_queue_capacity != 0) { + source_cfgs.push_back(cv::gapi::wip::onevpl::CfgParam::create_vpp_frames_pool_size(source_vpp_queue_capacity)); } - const std::string& device_id = cmd.get("faced"); auto face_net = cv::gapi::ie::Params { face_model_path, // path to topology IR get_weights_path(face_model_path), // path to weights @@ -247,7 +276,7 @@ int main(int argc, char *argv[]) { auto dx11_dev = createCOMPtrGuard(); auto dx11_ctx = createCOMPtrGuard(); - if (device_id.find("GPU") != std::string::npos) { + if (is_gpu(device_id)) { auto adapter_factory = createCOMPtrGuard(); { IDXGIFactory* out_factory = nullptr; @@ -294,11 +323,25 @@ int main(int argc, char *argv[]) { #endif // HAVE_D3D11 #endif // HAVE_DIRECTX // set ctx_config for GPU device only - no need in case of CPU device type - if (device_id.find("GPU") != std::string::npos) { + if (is_gpu(device_id)) { InferenceEngine::ParamMap ctx_config({{"CONTEXT_TYPE", "VA_SHARED"}, {"VA_DEVICE", accel_device_ptr} }); face_net.cfgContextParams(ctx_config); + face_net.pluginConfig({{"GPU_NV12_TWO_INPUTS", "YES" }}); + + std::cout <<"/*******************************************************/\n" + "ATTENTION: GPU Inference Engine preprocessing is not vital as expected!" + " Please consider param \"source_preproc_enable=1\" and specify " + " appropriated media frame transformation using oneVPL::VPP primitives" + " which force onevpl::GSource to produce tranformed media frames." + " For exploring list of supported transformations please find out " + " vpp_* related stuff in" + " gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp" + " Pay attention that to obtain expected result In this case VPP " + " transformation must match network input params." + " Please vote/create issue about exporting network params using GAPI\n" + "/******************************************************/" << std::endl; } #endif // HAVE_INF_ENGINE @@ -314,7 +357,7 @@ int main(int argc, char *argv[]) { // Create source cv::Ptr cap; try { - if (device_id.find("GPU") != std::string::npos) { + if (is_gpu(device_id)) { cap = cv::gapi::wip::make_onevpl_src(file_path, source_cfgs, device_id, accel_device_ptr, @@ -334,7 +377,7 @@ int main(int argc, char *argv[]) { // Now build the graph cv::GFrame in; auto size = cv::gapi::streaming::size(in); - auto roi = custom::LocateROI::on(size); + auto roi = custom::LocateROI::on(size, std::cref(device_id)); auto blob = cv::gapi::infer(roi, in); cv::GArray rcs = cv::gapi::parseSSD(blob, size, 0.5f, true, true); auto out_frame = cv::gapi::wip::draw::renderFrame(in, custom::BBoxes::on(rcs, roi)); @@ -397,6 +440,8 @@ typename cv::gapi::wip::onevpl::CfgParam create_from_string(const std::string &l std::string name = line.substr(0, name_endline_pos); std::string value = line.substr(name_endline_pos + 1); - return cv::gapi::wip::onevpl::CfgParam::create(name, value); + return cv::gapi::wip::onevpl::CfgParam::create(name, value, + /* vpp params strongly optional */ + name.find("vpp.") == std::string::npos); } } diff --git a/modules/gapi/samples/pipeline_modeling_tool.cpp b/modules/gapi/samples/pipeline_modeling_tool.cpp new file mode 100644 index 0000000000..ca6187e1ca --- /dev/null +++ b/modules/gapi/samples/pipeline_modeling_tool.cpp @@ -0,0 +1,421 @@ +#include +#include +#include +#include +#include +#include + +#include +#include // cv::CommandLineParser +#include + +#if defined(_WIN32) +#include +#endif + +#include "pipeline_modeling_tool/dummy_source.hpp" +#include "pipeline_modeling_tool/utils.hpp" +#include "pipeline_modeling_tool/pipeline_builder.hpp" + +enum class AppMode { + REALTIME, + BENCHMARK +}; + +static AppMode strToAppMode(const std::string& mode_str) { + if (mode_str == "realtime") { + return AppMode::REALTIME; + } else if (mode_str == "benchmark") { + return AppMode::BENCHMARK; + } else { + throw std::logic_error("Unsupported AppMode: " + mode_str + + "\nPlease chose between: realtime and benchmark"); + } +} + +template +T read(const cv::FileNode& node) { + return static_cast(node); +} + +static cv::FileNode check_and_get_fn(const cv::FileNode& fn, + const std::string& field, + const std::string& uplvl) { + const bool is_map = fn.isMap(); + if (!is_map || fn[field].empty()) { + throw std::logic_error(uplvl + " must contain field: " + field); + } + return fn[field]; +} + +static cv::FileNode check_and_get_fn(const cv::FileStorage& fs, + const std::string& field, + const std::string& uplvl) { + auto fn = fs[field]; + if (fn.empty()) { + throw std::logic_error(uplvl + " must contain field: " + field); + } + return fn; +} + +template +T check_and_read(const FileT& f, + const std::string& field, + const std::string& uplvl) { + auto fn = check_and_get_fn(f, field, uplvl); + return read(fn); +} + +template +cv::optional readOpt(const cv::FileNode& fn) { + return fn.empty() ? cv::optional() : cv::optional(read(fn)); +} + +template +std::vector readList(const cv::FileNode& fn, + const std::string& field, + const std::string& uplvl) { + auto fn_field = check_and_get_fn(fn, field, uplvl); + if (!fn_field.isSeq()) { + throw std::logic_error(field + " in " + uplvl + " must be a sequence"); + } + + std::vector vec; + for (auto iter : fn_field) { + vec.push_back(read(iter)); + } + return vec; +} + +template +std::vector readVec(const cv::FileNode& fn, + const std::string& field, + const std::string& uplvl) { + auto fn_field = check_and_get_fn(fn, field, uplvl); + + std::vector vec; + fn_field >> vec; + return vec; +} + +static int strToPrecision(const std::string& precision) { + static std::unordered_map str_to_precision = { + {"U8", CV_8U}, {"FP32", CV_32F}, {"FP16", CV_16F} + }; + auto it = str_to_precision.find(precision); + if (it == str_to_precision.end()) { + throw std::logic_error("Unsupported precision: " + precision); + } + return it->second; +} + +template <> +OutputDescr read(const cv::FileNode& fn) { + auto dims = readVec(fn, "dims", "output"); + auto str_prec = check_and_read(fn, "precision", "output"); + return OutputDescr{dims, strToPrecision(str_prec)}; +} + +template <> +Edge read(const cv::FileNode& fn) { + auto from = check_and_read(fn, "from", "edge"); + auto to = check_and_read(fn, "to", "edge"); + + auto splitNameAndPort = [](const std::string& str) { + auto pos = str.find(':'); + auto name = + pos == std::string::npos ? str : std::string(str.c_str(), pos); + size_t port = + pos == std::string::npos ? 0 : std::atoi(str.c_str() + pos + 1); + return std::make_pair(name, port); + }; + + auto p1 = splitNameAndPort(from); + auto p2 = splitNameAndPort(to); + return Edge{Edge::P{p1.first, p1.second}, Edge::P{p2.first, p2.second}}; +} + +static std::string getModelsPath() { + static char* models_path_c = std::getenv("PIPELINE_MODELS_PATH"); + static std::string models_path = models_path_c ? models_path_c : "."; + return models_path; +} + +template <> +ModelPath read(const cv::FileNode& fn) { + using cv::utils::fs::join; + if (!fn["xml"].empty() && !fn["bin"].empty()) { + return ModelPath{LoadPath{join(getModelsPath(), fn["xml"].string()), + join(getModelsPath(), fn["bin"].string())}}; + } else if (!fn["blob"].empty()){ + return ModelPath{ImportPath{join(getModelsPath(), fn["blob"].string())}}; + } else { + const std::string emsg = R""""( + Path to OpenVINO model must be specified in either of two formats: +1. + xml: path to *.xml + bin: path to *.bin +2. + blob: path to *.blob + )""""; + throw std::logic_error(emsg); + } +} + +static PLMode strToPLMode(const std::string& mode_str) { + if (mode_str == "streaming") { + return PLMode::STREAMING; + } else if (mode_str == "regular") { + return PLMode::REGULAR; + } else { + throw std::logic_error("Unsupported PLMode: " + mode_str + + "\nPlease chose between: streaming and regular"); + } +} + +static std::vector parseExecList(const std::string& exec_list) { + std::vector pl_types; + std::stringstream ss(exec_list); + std::string pl_type; + while (getline(ss, pl_type, ',')) { + pl_types.push_back(pl_type); + } + return pl_types; +} + +static void loadConfig(const std::string& filename, + std::map& config) { + cv::FileStorage fs(filename, cv::FileStorage::READ); + if (!fs.isOpened()) { + throw std::runtime_error("Failed to load config: " + filename); + } + + cv::FileNode root = fs.root(); + for (auto it = root.begin(); it != root.end(); ++it) { + auto device = *it; + if (!device.isMap()) { + throw std::runtime_error("Failed to parse config: " + filename); + } + for (auto item : device) { + config.emplace(item.name(), item.string()); + } + } +} + +int main(int argc, char* argv[]) { +#if defined(_WIN32) + timeBeginPeriod(1); +#endif + try { + const std::string keys = + "{ h help | | Print this help message. }" + "{ cfg | | Path to the config which is either" + " YAML file or string. }" + "{ load_config | | Optional. Path to XML/YAML/JSON file" + " to load custom IE parameters. }" + "{ cache_dir | | Optional. Enables caching of loaded models" + " to specified directory. }" + "{ log_file | | Optional. If file is specified, app will" + " dump expanded execution information. }" + "{ pl_mode | streaming | Optional. Pipeline mode: streaming/regular" + " if it's specified will be applied for" + " every pipeline. }" + "{ qc | 1 | Optional. Calculated automatically by G-API" + " if set to 0. If it's specified will be" + " applied for every pipeline. }" + "{ app_mode | realtime | Application mode (realtime/benchmark). }" + "{ exec_list | | A comma-separated list of pipelines that" + " will be executed. Spaces around commas" + " are prohibited. }"; + + cv::CommandLineParser cmd(argc, argv, keys); + if (cmd.has("help")) { + cmd.printMessage(); + return 0; + } + + const auto cfg = cmd.get("cfg"); + const auto load_config = cmd.get("load_config"); + const auto cached_dir = cmd.get("cache_dir"); + const auto log_file = cmd.get("log_file"); + const auto pl_mode = strToPLMode(cmd.get("pl_mode")); + const auto qc = cmd.get("qc"); + const auto app_mode = strToAppMode(cmd.get("app_mode")); + const auto exec_str = cmd.get("exec_list"); + + cv::FileStorage fs; + if (cfg.empty()) { + throw std::logic_error("Config must be specified via --cfg option"); + } + // NB: *.yml + if (cfg.size() < 5) { + throw std::logic_error("--cfg string must contain at least 5 symbols" + " to determine if it's a file (*.yml) a or string"); + } + if (cfg.substr(cfg.size() - 4, cfg.size()) == ".yml") { + if (!fs.open(cfg, cv::FileStorage::READ)) { + throw std::logic_error("Failed to open config file: " + cfg); + } + } else { + fs = cv::FileStorage(cfg, cv::FileStorage::FORMAT_YAML | + cv::FileStorage::MEMORY); + } + + std::map config; + if (!load_config.empty()) { + loadConfig(load_config, config); + } + // NB: Takes priority over config from file + if (!cached_dir.empty()) { + config = + std::map{{"CACHE_DIR", cached_dir}}; + } + + const double work_time_ms = + check_and_read(fs, "work_time", "Config"); + if (work_time_ms < 0) { + throw std::logic_error("work_time must be positive"); + } + + auto pipelines_fn = check_and_get_fn(fs, "Pipelines", "Config"); + if (!pipelines_fn.isMap()) { + throw std::logic_error("Pipelines field must be a map"); + } + + auto exec_list = !exec_str.empty() ? parseExecList(exec_str) + : pipelines_fn.keys(); + + + std::vector pipelines; + pipelines.reserve(exec_list.size()); + // NB: Build pipelines based on config information + PipelineBuilder builder; + for (const auto& name : exec_list) { + const auto& pl_fn = check_and_get_fn(pipelines_fn, name, "Pipelines"); + builder.setName(name); + // NB: Set source + { + const auto& src_fn = check_and_get_fn(pl_fn, "source", name); + auto src_name = + check_and_read(src_fn, "name", "source"); + auto latency = + check_and_read(src_fn, "latency", "source"); + auto output = + check_and_read(src_fn, "output", "source"); + // NB: In case BENCHMARK mode sources work with zero latency. + if (app_mode == AppMode::BENCHMARK) { + latency = 0.0; + } + builder.setSource(src_name, latency, output); + } + + const auto& nodes_fn = check_and_get_fn(pl_fn, "nodes", name); + if (!nodes_fn.isSeq()) { + throw std::logic_error("nodes in " + name + " must be a sequence"); + } + for (auto node_fn : nodes_fn) { + auto node_name = + check_and_read(node_fn, "name", "node"); + auto node_type = + check_and_read(node_fn, "type", "node"); + if (node_type == "Dummy") { + auto time = + check_and_read(node_fn, "time", node_name); + if (time < 0) { + throw std::logic_error(node_name + " time must be positive"); + } + auto output = + check_and_read(node_fn, "output", node_name); + builder.addDummy(node_name, time, output); + } else if (node_type == "Infer") { + InferParams params; + params.path = read(node_fn); + params.device = + check_and_read(node_fn, "device", node_name); + params.input_layers = + readList(node_fn, "input_layers", node_name); + params.output_layers = + readList(node_fn, "output_layers", node_name); + params.config = config; + builder.addInfer(node_name, params); + } else { + throw std::logic_error("Unsupported node type: " + node_type); + } + } + + const auto edges_fn = check_and_get_fn(pl_fn, "edges", name); + if (!edges_fn.isSeq()) { + throw std::logic_error("edges in " + name + " must be a sequence"); + } + for (auto edge_fn : edges_fn) { + auto edge = read(edge_fn); + builder.addEdge(edge); + } + + // NB: Pipeline mode from config takes priority over cmd. + auto mode = readOpt(pl_fn["mode"]); + builder.setMode(mode.has_value() ? strToPLMode(mode.value()) : pl_mode); + + // NB: Queue capacity from config takes priority over cmd. + auto config_qc = readOpt(pl_fn["queue_capacity"]); + auto queue_capacity = config_qc.has_value() ? config_qc.value() : qc; + // NB: 0 is special constant that means + // queue capacity should be calculated automatically. + if (queue_capacity != 0) { + builder.setQueueCapacity(queue_capacity); + } + + auto dump = readOpt(pl_fn["dump"]); + if (dump) { + builder.setDumpFilePath(dump.value()); + } + + pipelines.emplace_back(builder.build()); + } + + // NB: Compille pipelines + for (size_t i = 0; i < pipelines.size(); ++i) { + pipelines[i]->compile(); + } + + // NB: Execute pipelines + std::vector eptrs(pipelines.size(), nullptr); + std::vector threads(pipelines.size()); + for (size_t i = 0; i < pipelines.size(); ++i) { + threads[i] = std::thread([&, i]() { + try { + pipelines[i]->run(work_time_ms); + } catch (...) { + eptrs[i] = std::current_exception(); + } + }); + } + + std::ofstream file; + if (!log_file.empty()) { + file.open(log_file); + } + + for (size_t i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + + for (size_t i = 0; i < threads.size(); ++i) { + if (eptrs[i] != nullptr) { + try { + std::rethrow_exception(eptrs[i]); + } catch (std::exception& e) { + throw std::logic_error(pipelines[i]->name() + " failed: " + e.what()); + } + } + if (file.is_open()) { + file << pipelines[i]->report().toStr(true) << std::endl; + } + std::cout << pipelines[i]->report().toStr() << std::endl; + } + } catch (const std::exception& e) { + std::cout << e.what() << std::endl; + throw; + } + return 0; +} diff --git a/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp b/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp new file mode 100644 index 0000000000..1514eb2671 --- /dev/null +++ b/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp @@ -0,0 +1,63 @@ +#ifndef OPENCV_GAPI_PIPELINE_MODELING_TOOL_DUMMY_SOURCE_HPP +#define OPENCV_GAPI_PIPELINE_MODELING_TOOL_DUMMY_SOURCE_HPP + +#include +#include +#include + +#include +#include // cv::gapi::wip::IStreamSource + +#include "utils.hpp" + +class DummySource final: public cv::gapi::wip::IStreamSource { +public: + using Ptr = std::shared_ptr; + DummySource(const double latency, + const OutputDescr& output); + bool pull(cv::gapi::wip::Data& data) override; + cv::GMetaArg descr_of() const override; + +private: + double m_latency; + cv::Mat m_mat; + using TimePoint = + std::chrono::time_point; + cv::optional m_prev_pull_tp; +}; + +DummySource::DummySource(const double latency, + const OutputDescr& output) + : m_latency(latency) { + utils::createNDMat(m_mat, output.dims, output.precision); + utils::generateRandom(m_mat); +} + +bool DummySource::pull(cv::gapi::wip::Data& data) { + using namespace std::chrono; + using namespace cv::gapi::streaming; + // NB: In case it's the first pull. + if (!m_prev_pull_tp) { + m_prev_pull_tp = cv::util::make_optional(high_resolution_clock::now()); + } + // NB: Just increase reference counter not to release mat memory + // after assigning it to the data. + cv::Mat mat = m_mat; + auto end = high_resolution_clock::now(); + auto elapsed = + duration_cast>(end - *m_prev_pull_tp).count(); + auto delta = m_latency - elapsed; + if (delta > 0) { + utils::sleep(delta); + } + data.meta[meta_tag::timestamp] = int64_t{utils::timestamp()}; + data = mat; + m_prev_pull_tp = cv::util::make_optional(high_resolution_clock::now()); + return true; +} + +cv::GMetaArg DummySource::descr_of() const { + return cv::GMetaArg{cv::descr_of(m_mat)}; +} + +#endif // OPENCV_GAPI_PIPELINE_MODELING_TOOL_DUMMY_SOURCE_HPP diff --git a/modules/gapi/samples/pipeline_modeling_tool/pipeline.hpp b/modules/gapi/samples/pipeline_modeling_tool/pipeline.hpp new file mode 100644 index 0000000000..2951d45610 --- /dev/null +++ b/modules/gapi/samples/pipeline_modeling_tool/pipeline.hpp @@ -0,0 +1,205 @@ +#ifndef OPENCV_GAPI_PIPELINE_MODELING_TOOL_PIPELINE_HPP +#define OPENCV_GAPI_PIPELINE_MODELING_TOOL_PIPELINE_HPP + +struct PerfReport { + std::string name; + double avg_latency = 0.0; + double throughput = 0.0; + int64_t first_run_latency = 0; + int64_t elapsed = 0; + int64_t compilation_time = 0; + std::vector latencies; + + std::string toStr(bool expanded = false) const; +}; + +std::string PerfReport::toStr(bool expand) const { + std::stringstream ss; + ss << name << ": Compilation time: " << compilation_time << " ms; " + << "Average latency: " << avg_latency << " ms; Throughput: " + << throughput << " FPS; First latency: " + << first_run_latency << " ms"; + + if (expand) { + ss << "\nTotal processed frames: " << latencies.size() + << "\nTotal elapsed time: " << elapsed << " ms" << std::endl; + for (size_t i = 0; i < latencies.size(); ++i) { + ss << std::endl; + ss << "Frame:" << i << "\nLatency: " + << latencies[i] << " ms"; + } + } + + return ss.str(); +} + +class Pipeline { +public: + using Ptr = std::shared_ptr; + + Pipeline(std::string&& name, + cv::GComputation&& comp, + cv::gapi::wip::IStreamSource::Ptr&& src, + cv::GCompileArgs&& args, + const size_t num_outputs); + + void compile(); + void run(double work_time_ms); + const PerfReport& report() const; + const std::string& name() const { return m_name;} + + virtual ~Pipeline() = default; + +protected: + struct RunPerf { + int64_t elapsed = 0; + std::vector latencies; + }; + + virtual void _compile() = 0; + virtual RunPerf _run(double work_time_ms) = 0; + + std::string m_name; + cv::GComputation m_comp; + cv::gapi::wip::IStreamSource::Ptr m_src; + cv::GCompileArgs m_args; + size_t m_num_outputs; + PerfReport m_perf; +}; + +Pipeline::Pipeline(std::string&& name, + cv::GComputation&& comp, + cv::gapi::wip::IStreamSource::Ptr&& src, + cv::GCompileArgs&& args, + const size_t num_outputs) + : m_name(std::move(name)), + m_comp(std::move(comp)), + m_src(std::move(src)), + m_args(std::move(args)), + m_num_outputs(num_outputs) { + m_perf.name = m_name; +} + +void Pipeline::compile() { + m_perf.compilation_time = + utils::measure([this]() { + _compile(); + }); +} + +void Pipeline::run(double work_time_ms) { + auto run_perf = _run(work_time_ms); + + m_perf.elapsed = run_perf.elapsed; + m_perf.latencies = std::move(run_perf.latencies); + + m_perf.avg_latency = + std::accumulate(m_perf.latencies.begin(), + m_perf.latencies.end(), + 0.0) / static_cast(m_perf.latencies.size()); + m_perf.throughput = + (m_perf.latencies.size() / static_cast(m_perf.elapsed)) * 1000; + + m_perf.first_run_latency = m_perf.latencies[0]; +} + +const PerfReport& Pipeline::report() const { + return m_perf; +} + +class StreamingPipeline : public Pipeline { +public: + using Pipeline::Pipeline; + +private: + void _compile() override { + m_compiled = + m_comp.compileStreaming({m_src->descr_of()}, + cv::GCompileArgs(m_args)); + } + + Pipeline::RunPerf _run(double work_time_ms) override { + // NB: Setup. + using namespace std::chrono; + // NB: N-1 buffers + timestamp. + std::vector out_mats(m_num_outputs - 1); + int64_t start_ts = -1; + cv::GRunArgsP pipeline_outputs; + for (auto& m : out_mats) { + pipeline_outputs += cv::gout(m); + } + pipeline_outputs += cv::gout(start_ts); + m_compiled.setSource(m_src); + + // NB: Start execution & measure performance statistics. + Pipeline::RunPerf perf; + auto start = high_resolution_clock::now(); + m_compiled.start(); + while (m_compiled.pull(cv::GRunArgsP{pipeline_outputs})) { + int64_t latency = utils::timestamp() - start_ts; + + perf.latencies.push_back(latency); + perf.elapsed = duration_cast( + high_resolution_clock::now() - start).count(); + + if (perf.elapsed >= work_time_ms) { + m_compiled.stop(); + break; + } + }; + return perf; + } + + cv::GStreamingCompiled m_compiled; +}; + +class RegularPipeline : public Pipeline { +public: + using Pipeline::Pipeline; + +private: + void _compile() override { + m_compiled = + m_comp.compile({m_src->descr_of()}, + cv::GCompileArgs(m_args)); + } + + Pipeline::RunPerf _run(double work_time_ms) override { + // NB: Setup + using namespace std::chrono; + cv::gapi::wip::Data d; + std::vector out_mats(m_num_outputs); + cv::GRunArgsP pipeline_outputs; + for (auto& m : out_mats) { + pipeline_outputs += cv::gout(m); + } + + // NB: Start execution & measure performance statistics. + Pipeline::RunPerf perf; + auto start = high_resolution_clock::now(); + while (m_src->pull(d)) { + auto in_mat = cv::util::get(d); + int64_t latency = utils::measure([&]{ + m_compiled(cv::gin(in_mat), cv::GRunArgsP{pipeline_outputs}); + }); + + perf.latencies.push_back(latency); + perf.elapsed = duration_cast( + high_resolution_clock::now() - start).count(); + + if (perf.elapsed >= work_time_ms) { + break; + } + }; + return perf; + } + + cv::GCompiled m_compiled; +}; + +enum class PLMode { + REGULAR, + STREAMING +}; + +#endif // OPENCV_GAPI_PIPELINE_MODELING_TOOL_PIPELINE_HPP diff --git a/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp b/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp new file mode 100644 index 0000000000..63ada28603 --- /dev/null +++ b/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp @@ -0,0 +1,502 @@ +#ifndef OPENCV_GAPI_PIPELINE_MODELING_TOOL_PIPELINE_BUILDER_HPP +#define OPENCV_GAPI_PIPELINE_MODELING_TOOL_PIPELINE_BUILDER_HPP + +#include + +#include // cv::gapi::GNetPackage +#include // cv::gapi::wip::IStreamSource +#include // cv::gapi::ie::Params +#include // cv::gapi::GCompileArgs +#include // GAPI_OCV_KERNEL +#include // G_API_OP + +#include "pipeline.hpp" +#include "utils.hpp" + +struct Edge { + struct P { + std::string name; + size_t port; + }; + + P src; + P dst; +}; + +struct CallNode { + using F = std::function; + + std::string name; + F run; +}; + +struct DataNode { + cv::optional arg; +}; + +struct Node { + using Ptr = std::shared_ptr; + using WPtr = std::weak_ptr; + using Kind = cv::util::variant; + + std::vector in_nodes; + std::vector out_nodes; + Kind kind; +}; + +struct DummyCall { + G_API_OP(GDummy, + , + "custom.dummy") { + static cv::GMatDesc outMeta(const cv::GMatDesc& /* in */, + double /* time */, + const OutputDescr& output) { + if (output.dims.size() == 2) { + return cv::GMatDesc(output.precision, + 1, + cv::Size(output.dims[0], output.dims[1])); + } + return cv::GMatDesc(output.precision, output.dims); + } + }; + + struct DummyState { + cv::Mat mat; + }; + + // NB: Generate random mat once and then + // copy to dst buffer on every iteration. + GAPI_OCV_KERNEL_ST(GCPUDummy, GDummy, DummyState) { + static void setup(const cv::GMatDesc& /*in*/, + double /*time*/, + const OutputDescr& output, + std::shared_ptr& state, + const cv::GCompileArgs& /*args*/) { + state.reset(new DummyState{}); + utils::createNDMat(state->mat, output.dims, output.precision); + utils::generateRandom(state->mat); + } + + static void run(const cv::Mat& /*in_mat*/, + double time, + const OutputDescr& /*output*/, + cv::Mat& out_mat, + DummyState& state) { + using namespace std::chrono; + double total = 0; + auto start = high_resolution_clock::now(); + state.mat.copyTo(out_mat); + while (total < time) { + total = duration_cast>( + high_resolution_clock::now() - start).count(); + } + } + }; + + void operator()(const cv::GProtoArgs& inputs, cv::GProtoArgs& outputs); + + size_t numInputs() const { return 1; } + size_t numOutputs() const { return 1; } + + double time; + OutputDescr output; +}; + +void DummyCall::operator()(const cv::GProtoArgs& inputs, + cv::GProtoArgs& outputs) { + GAPI_Assert(inputs.size() == 1u); + GAPI_Assert(cv::util::holds_alternative(inputs[0])); + GAPI_Assert(outputs.empty()); + auto in = cv::util::get(inputs[0]); + outputs.emplace_back(GDummy::on(in, time, output)); +} + +struct InferCall { + void operator()(const cv::GProtoArgs& inputs, cv::GProtoArgs& outputs); + size_t numInputs() const { return input_layers.size(); } + size_t numOutputs() const { return output_layers.size(); } + + std::string tag; + std::vector input_layers; + std::vector output_layers; +}; + +void InferCall::operator()(const cv::GProtoArgs& inputs, + cv::GProtoArgs& outputs) { + GAPI_Assert(inputs.size() == input_layers.size()); + GAPI_Assert(outputs.empty()); + + cv::GInferInputs g_inputs; + // TODO: Add an opportunity not specify input/output layers in case + // there is only single layer. + for (size_t i = 0; i < inputs.size(); ++i) { + // TODO: Support GFrame as well. + GAPI_Assert(cv::util::holds_alternative(inputs[i])); + auto in = cv::util::get(inputs[i]); + g_inputs[input_layers[i]] = in; + } + auto g_outputs = cv::gapi::infer(tag, g_inputs); + for (size_t i = 0; i < output_layers.size(); ++i) { + outputs.emplace_back(g_outputs.at(output_layers[i])); + } +} + +struct SourceCall { + void operator()(const cv::GProtoArgs& inputs, cv::GProtoArgs& outputs); + size_t numInputs() const { return 0; } + size_t numOutputs() const { return 1; } +}; + +void SourceCall::operator()(const cv::GProtoArgs& inputs, + cv::GProtoArgs& outputs) { + GAPI_Assert(inputs.empty()); + GAPI_Assert(outputs.empty()); + // NB: Since NV12 isn't exposed source always produce GMat. + outputs.emplace_back(cv::GMat()); +} + +struct LoadPath { + std::string xml; + std::string bin; +}; + +struct ImportPath { + std::string blob; +}; + +using ModelPath = cv::util::variant; + +struct InferParams { + std::string name; + ModelPath path; + std::string device; + std::vector input_layers; + std::vector output_layers; + std::map config; +}; + +class PipelineBuilder { +public: + PipelineBuilder(); + void addDummy(const std::string& name, + const double time, + const OutputDescr& output); + + void addInfer(const std::string& name, const InferParams& params); + + void setSource(const std::string& name, + double latency, + const OutputDescr& output); + + void addEdge(const Edge& edge); + void setMode(PLMode mode); + void setDumpFilePath(const std::string& dump); + void setQueueCapacity(const size_t qc); + void setName(const std::string& name); + + Pipeline::Ptr build(); + +private: + template + void addCall(const std::string& name, + CallT&& call); + + Pipeline::Ptr construct(); + + template + using M = std::unordered_map; + struct State { + struct NodeEdges { + std::vector input_edges; + std::vector output_edges; + }; + + M calls_map; + std::vector all_calls; + + cv::gapi::GNetPackage networks; + cv::gapi::GKernelPackage kernels; + cv::GCompileArgs compile_args; + cv::gapi::wip::IStreamSource::Ptr src; + PLMode mode = PLMode::STREAMING; + std::string name; + }; + + std::unique_ptr m_state; +}; + +PipelineBuilder::PipelineBuilder() : m_state(new State{}) { }; + +void PipelineBuilder::addDummy(const std::string& name, + const double time, + const OutputDescr& output) { + m_state->kernels.include(); + addCall(name, DummyCall{time, output}); +} + +template +void PipelineBuilder::addCall(const std::string& name, + CallT&& call) { + + size_t num_inputs = call.numInputs(); + size_t num_outputs = call.numOutputs(); + Node::Ptr call_node(new Node{{},{},Node::Kind{CallNode{name, std::move(call)}}}); + // NB: Create placeholders for inputs. + call_node->in_nodes.resize(num_inputs); + // NB: Create outputs with empty data. + for (size_t i = 0; i < num_outputs; ++i) { + call_node->out_nodes.emplace_back(new Node{{call_node}, + {}, + Node::Kind{DataNode{}}}); + } + + auto it = m_state->calls_map.find(name); + if (it != m_state->calls_map.end()) { + throw std::logic_error("Node: " + name + " already exists!"); + } + m_state->calls_map.emplace(name, call_node); + m_state->all_calls.emplace_back(call_node); +} + +void PipelineBuilder::addInfer(const std::string& name, + const InferParams& params) { + // NB: No default ctor for Params. + std::unique_ptr> pp; + if (cv::util::holds_alternative(params.path)) { + auto load_path = cv::util::get(params.path); + pp.reset(new cv::gapi::ie::Params(name, + load_path.xml, + load_path.bin, + params.device)); + } else { + GAPI_Assert(cv::util::holds_alternative(params.path)); + auto import_path = cv::util::get(params.path); + pp.reset(new cv::gapi::ie::Params(name, + import_path.blob, + params.device)); + } + + pp->pluginConfig(params.config); + m_state->networks += cv::gapi::networks(*pp); + + addCall(name, InferCall{name, params.input_layers, params.output_layers}); +} + +void PipelineBuilder::addEdge(const Edge& edge) { + const auto& src_it = m_state->calls_map.find(edge.src.name); + if (src_it == m_state->calls_map.end()) { + throw std::logic_error("Failed to find node: " + edge.src.name); + } + auto src_node = src_it->second; + if (src_node->out_nodes.size() <= edge.src.port) { + throw std::logic_error("Failed to access node: " + edge.src.name + + " by out port: " + std::to_string(edge.src.port)); + } + + auto dst_it = m_state->calls_map.find(edge.dst.name); + if (dst_it == m_state->calls_map.end()) { + throw std::logic_error("Failed to find node: " + edge.dst.name); + } + auto dst_node = dst_it->second; + if (dst_node->in_nodes.size() <= edge.dst.port) { + throw std::logic_error("Failed to access node: " + edge.dst.name + + " by in port: " + std::to_string(edge.dst.port)); + } + + auto out_data = src_node->out_nodes[edge.src.port]; + auto& in_data = dst_node->in_nodes[edge.dst.port]; + // NB: in_data != nullptr. + if (!in_data.expired()) { + throw std::logic_error("Node: " + edge.dst.name + + " already connected by in port: " + + std::to_string(edge.dst.port)); + } + dst_node->in_nodes[edge.dst.port] = out_data; + out_data->out_nodes.push_back(dst_node); +} + +void PipelineBuilder::setSource(const std::string& name, + double latency, + const OutputDescr& output) { + GAPI_Assert(!m_state->src); + m_state->src = std::make_shared(latency, output); + addCall(name, SourceCall{}); +} + +void PipelineBuilder::setMode(PLMode mode) { + m_state->mode = mode; +} + +void PipelineBuilder::setDumpFilePath(const std::string& dump) { + m_state->compile_args.emplace_back(cv::graph_dump_path{dump}); +} + +void PipelineBuilder::setQueueCapacity(const size_t qc) { + m_state->compile_args.emplace_back(cv::gapi::streaming::queue_capacity{qc}); +} + +void PipelineBuilder::setName(const std::string& name) { + m_state->name = name; +} + +static bool visit(Node::Ptr node, + std::vector& sorted, + std::unordered_map& visited) { + if (!node) { + throw std::logic_error("Found null node"); + } + + visited[node] = 1; + for (auto in : node->in_nodes) { + auto in_node = in.lock(); + if (visited[in_node] == 0) { + if (visit(in_node, sorted, visited)) { + return true; + } + } else if (visited[in_node] == 1) { + return true; + } + } + visited[node] = 2; + sorted.push_back(node); + return false; +} + +static cv::optional> +toposort(const std::vector nodes) { + std::vector sorted; + std::unordered_map visited; + for (auto n : nodes) { + if (visit(n, sorted, visited)) { + return cv::optional>{}; + } + } + return cv::util::make_optional(sorted); +} + +Pipeline::Ptr PipelineBuilder::construct() { + // NB: Unlike G-API, pipeline_builder_tool graph always starts with CALL node + // (not data) that produce datas, so the call node which doesn't have + // inputs is considered as "producer" node. + // + // Graph always starts with CALL node and ends with DATA node. + // Graph example: [source] -> (source:0) -> [PP] -> (PP:0) + // + // The algorithm is quite simple: + // 0. Verify that every call input node exists (connected). + // 1. Sort all nodes by visiting only call nodes, + // since there is no data nodes that's not connected with any call node, + // it's guarantee that every node will be visited. + // 2. Fillter call nodes. + // 3. Go through every call node. + // FIXME: Add toposort in case user passed nodes + // in arbitrary order which is unlikely happened. + // 4. Extract proto input from every input node + // 5. Run call and get outputs + // 6. If call node doesn't have inputs it means that it's "producer" node, + // so collect all outputs to graph_inputs vector. + // 7. Assign proto outputs to output data nodes, + // so the next calls can use them as inputs. + cv::GProtoArgs graph_inputs; + cv::GProtoArgs graph_outputs; + // 0. Verify that every call input node exists (connected). + for (auto call_node : m_state->all_calls) { + for (size_t i = 0; i < call_node->in_nodes.size(); ++i) { + const auto& in_data_node = call_node->in_nodes[i]; + // NB: in_data_node == nullptr. + if (in_data_node.expired()) { + const auto& call = cv::util::get(call_node->kind); + throw std::logic_error( + "Node: " + call.name + " in Pipeline: " + m_state->name + + " has dangling input by in port: " + std::to_string(i)); + } + } + } + // (0) Sort all nodes; + auto has_sorted = toposort(m_state->all_calls); + if (!has_sorted) { + throw std::logic_error( + "Pipeline: " + m_state->name + " has cyclic dependencies") ; + } + auto& sorted = has_sorted.value(); + // (1). Fillter call nodes. + std::vector sorted_calls; + for (auto n : sorted) { + if (cv::util::holds_alternative(n->kind)) { + sorted_calls.push_back(n); + } + } + // (2). Go through every call node. + for (auto call_node : sorted_calls) { + cv::GProtoArgs outputs; + cv::GProtoArgs inputs; + for (size_t i = 0; i < call_node->in_nodes.size(); ++i) { + auto in_node = call_node->in_nodes.at(i); + auto in_data = cv::util::get(in_node.lock()->kind); + if (!in_data.arg.has_value()) { + throw std::logic_error("data hasn't been provided"); + } + // (3). Extract proto input from every input node. + inputs.push_back(in_data.arg.value()); + } + // (4). Run call and get outputs. + auto call = cv::util::get(call_node->kind); + call.run(inputs, outputs); + // (5) If call node doesn't have inputs + // it means that it's input producer node (Source). + if (call_node->in_nodes.empty()) { + for (auto out : outputs) { + graph_inputs.push_back(out); + } + } + // (6). Assign proto outputs to output data nodes, + // so the next calls can use them as inputs. + GAPI_Assert(outputs.size() == call_node->out_nodes.size()); + for (size_t i = 0; i < outputs.size(); ++i) { + auto out_node = call_node->out_nodes[i]; + auto& out_data = cv::util::get(out_node->kind); + out_data.arg = cv::util::make_optional(outputs[i]); + if (out_node->out_nodes.empty()) { + graph_outputs.push_back(out_data.arg.value()); + } + } + } + + m_state->compile_args.emplace_back(m_state->networks); + m_state->compile_args.emplace_back(m_state->kernels); + + if (m_state->mode == PLMode::STREAMING) { + GAPI_Assert(graph_inputs.size() == 1); + GAPI_Assert(cv::util::holds_alternative(graph_inputs[0])); + // FIXME: Handle GFrame when NV12 comes. + const auto& graph_input = cv::util::get(graph_inputs[0]); + // NB: In case streaming mode need to expose timestamp in order to + // calculate performance metrics. + graph_outputs.emplace_back( + cv::gapi::streaming::timestamp(graph_input).strip()); + + return std::make_shared(std::move(m_state->name), + cv::GComputation( + cv::GProtoInputArgs{graph_inputs}, + cv::GProtoOutputArgs{graph_outputs}), + std::move(m_state->src), + std::move(m_state->compile_args), + graph_outputs.size()); + } + GAPI_Assert(m_state->mode == PLMode::REGULAR); + return std::make_shared(std::move(m_state->name), + cv::GComputation( + cv::GProtoInputArgs{graph_inputs}, + cv::GProtoOutputArgs{graph_outputs}), + std::move(m_state->src), + std::move(m_state->compile_args), + graph_outputs.size()); +} + +Pipeline::Ptr PipelineBuilder::build() { + auto pipeline = construct(); + m_state.reset(new State{}); + return pipeline; +} + +#endif // OPENCV_GAPI_PIPELINE_MODELING_TOOL_PIPELINE_BUILDER_HPP diff --git a/modules/gapi/samples/pipeline_modeling_tool/test_pipeline_modeling_tool.py b/modules/gapi/samples/pipeline_modeling_tool/test_pipeline_modeling_tool.py new file mode 100644 index 0000000000..ef4bce6476 --- /dev/null +++ b/modules/gapi/samples/pipeline_modeling_tool/test_pipeline_modeling_tool.py @@ -0,0 +1,931 @@ +import os +import subprocess + +pipeline_modeling_tool = os.getenv('PIPELINE_MODELING_TOOL') + +def get_output(exec_str): + try: + out = subprocess.check_output(exec_str, + stderr=subprocess.STDOUT, + shell=True).strip().decode() + except subprocess.CalledProcessError as exc: + out = exc.output.strip().decode() + return out + + +def test_error_no_config_specified(): + out = get_output(pipeline_modeling_tool) + assert out.startswith('Config must be specified via --cfg option') + + +def test_error_no_config_exists(): + cfg_file = 'not_existing_cfg.yml' + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert 'Failed to open config file: not_existing_cfg.yml' in out + + +def test_error_no_work_time(): + cfg_file = """\"%YAML:1.0\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('Config must contain field: work_time') + + +def test_error_work_time_not_positive(): + cfg_file = """\"%YAML:1.0 +work_time: -1\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('work_time must be positive') + + +def test_error_no_pipelines(): + cfg_file = """\"%YAML:1.0 +work_time: 1000\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('Config must contain field: Pipelines') + + +def test_error_pipelines_node_not_map(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines:\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('Pipelines field must be a map') + + +def test_error_config_not_contain_pl(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1:\" """ + + exec_str = '{} --cfg={} --exec_list=PL2'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('Pipelines must contain field: PL2') + + +def test_error_no_source(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1:\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('PL1 must contain field: source') + + +def test_error_source_no_name(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source:\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('source must contain field: name') + + +def test_error_source_no_latency(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('source must contain field: latency') + + +def test_error_source_no_output(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('source must contain field: output') + + +def test_error_source_output_no_dims(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output:\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('output must contain field: dims') + + +def test_error_source_output_no_precision(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4]\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('output must contain field: precision') + + +def test_error_no_nodes(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('PL1 must contain field: nodes') + + +def test_error_nodes_not_sequence(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes:\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('nodes in PL1 must be a sequence') + + +def test_error_node_no_name(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + -\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('node must contain field: name') + + +def test_error_node_no_type(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('node must contain field: type') + + +def test_error_node_unknown_type(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Unknown'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('Unsupported node type: Unknown') + + +def test_error_node_dummy_no_time(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('Node0 must contain field: time') + + +def test_error_node_dummy_not_positive_time(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + time: -0.2\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('Node0 time must be positive') + + +def test_error_node_dummy_no_output(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('Node0 must contain field: output') + + +def test_error_node_infer_no_model_path(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Infer'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + + error_msg = """Path to OpenVINO model must be specified in either of two formats: +1. + xml: path to *.xml + bin: path to *.bin +2. + blob: path to *.blob""" + assert out.startswith(error_msg) + + +def test_error_node_infer_no_input_layers(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Infer' + blob: model.blob + device: 'CPU'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + + assert out.startswith('Node0 must contain field: input_layers') + + +def test_error_node_infer_input_layers_are_empty(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Infer' + blob: model.blob + device: 'CPU' + input_layers: + \" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + + assert out.startswith('input_layers in Node0 must be a sequence') + + +def test_error_node_infer_no_output_layers(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Infer' + blob: model.blob + device: 'CPU' + input_layers: + - 'layer_name'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + + assert out.startswith('Node0 must contain field: output_layers') + + +def test_error_node_infer_output_layers_are_empty(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Infer' + blob: model.blob + device: 'CPU' + input_layers: + - 'layer_name' + output_layers:\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + + assert out.startswith('output_layers in Node0 must be a sequence') + + +def test_error_no_edges(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + + assert out.startswith('PL1 must contain field: edges') + + +def test_error_edges_not_sequence(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + edges:\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + + assert out.startswith('edges in PL1 must be a sequence') + + +def test_error_edges_no_from(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + edges: + -\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + + assert out.startswith('edge must contain field: from') + + +def test_error_edges_no_to(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + edges: + - from: 'Node0'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + + assert out.startswith('edge must contain field: to') + + +def test_error_edges_from_not_exists(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + edges: + - from: 'Node1' + to: 'Node2'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + + assert out.startswith('Failed to find node: Node1') + + +def test_error_edges_from_port_not_exists(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + edges: + - from: 'Node0:10' + to: 'Node2'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + + assert out.startswith('Failed to access node: Node0 by out port: 10') + + +def test_error_edges_to_not_exists(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + edges: + - from: 'Src' + to: 'Node2'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + + assert out.startswith('Failed to find node: Node2') + + +def test_error_edges_to_port_not_exists(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + edges: + - from: 'Src' + to: 'Node0:3'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + + assert out.startswith('Failed to access node: Node0 by in port: 3') + + +def test_error_connect_to_source(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + edges: + - from: 'Node0' + to: 'Src'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + + assert out.startswith('Failed to access node: Src by in port: 0') + + +def test_error_double_edge(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + edges: + - from: 'Src' + to: 'Node0' + - from: 'Src' + to: 'Node0'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + + assert out.startswith('Node: Node0 already connected by in port: 0') + + +def test_error_double_edge(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + edges: + - from: 'Src' + to: 'Node0' + - from: 'Src' + to: 'Node0'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + + assert out.startswith('Node: Node0 already connected by in port: 0') + + +def test_node_has_dangling_input(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + + - name: 'Node1' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + edges: + - from: 'Node0' + to: 'Node1'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + + assert out.startswith('Node: Node0 in Pipeline: PL1 has dangling input by in port: 0') + + +def test_error_has_cycle_0(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node' + type: 'Infer' + blob: 'model.blob' + device: 'CPU' + input_layers: + - 'in_layer_name_0' + - 'in_layer_name_1' + output_layers: + - 'out_layer_name' + edges: + - from: 'Src' + to: 'Node:0' + - from: 'Node:0' + to: 'Node:1'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('Pipeline: PL1 has cyclic dependencies') + + +def test_error_has_cycle_0(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Infer' + blob: 'model.blob' + device: 'CPU' + input_layers: + - 'in_layer_name_0' + - 'in_layer_name_1' + output_layers: + - 'out_layer_name' + + - name: 'Node1' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + edges: + - from: 'Src' + to: 'Node0:0' + - from: 'Node0:0' + to: 'Node1:0' + - from: 'Node1' + to: 'Node0:1'\" """ + + exec_str = '{} --cfg={}'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('Pipeline: PL1 has cyclic dependencies') + + +def test_error_no_load_config_exists(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + edges: + - from: 'Src' + to: 'Node0'\" """ + + exec_str = '{} --cfg={} --load_config=not_existing.yml'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert 'Failed to load config: not_existing.yml' in out + + +def test_error_invalid_app_mode(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + edges: + - from: 'Src' + to: 'Node0'\" """ + + exec_str = '{} --cfg={} --pl_mode=unknown'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('Unsupported PLMode: unknown\n' + 'Please chose between: streaming and regular') + + +def test_error_invalid_pl_mode(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: +PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + edges: + - from: 'Src' + to: 'Node0'\" """ + + exec_str = '{} --cfg={} --app_mode=unknown'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('Unsupported AppMode: unknown\n' + 'Please chose between: realtime and benchmark') diff --git a/modules/gapi/samples/pipeline_modeling_tool/utils.hpp b/modules/gapi/samples/pipeline_modeling_tool/utils.hpp new file mode 100644 index 0000000000..c110bf3b47 --- /dev/null +++ b/modules/gapi/samples/pipeline_modeling_tool/utils.hpp @@ -0,0 +1,96 @@ +#ifndef OPENCV_GAPI_PIPELINE_MODELING_TOOL_UTILS_HPP +#define OPENCV_GAPI_PIPELINE_MODELING_TOOL_UTILS_HPP + +#include + +#if defined(_WIN32) +#include +#endif + +// FIXME: It's better to place it somewhere in common.hpp +struct OutputDescr { + std::vector dims; + int precision; +}; + +namespace utils { + +inline void createNDMat(cv::Mat& mat, const std::vector& dims, int depth) { + GAPI_Assert(!dims.empty()); + mat.create(dims, depth); + if (dims.size() == 1) { + //FIXME: Well-known 1D mat WA + mat.dims = 1; + } +} + +inline void generateRandom(cv::Mat& out) { + switch (out.depth()) { + case CV_8U: + cv::randu(out, 0, 255); + break; + case CV_32F: + cv::randu(out, 0.f, 1.f); + break; + case CV_16F: { + std::vector dims; + for (int i = 0; i < out.size.dims(); ++i) { + dims.push_back(out.size[i]); + } + cv::Mat fp32_mat; + createNDMat(fp32_mat, dims, CV_32F); + cv::randu(fp32_mat, 0.f, 1.f); + fp32_mat.convertTo(out, out.type()); + break; + } + default: + throw std::logic_error("Unsupported preprocessing depth"); + } +} + +inline void sleep(double ms) { +#if defined(_WIN32) + // NB: It takes portions of 100 nanoseconds. + int64_t ns_units = static_cast(ms * 1e4); + // FIXME: Wrap it to RAII and instance only once. + HANDLE timer = CreateWaitableTimer(NULL, true, NULL); + if (!timer) { + throw std::logic_error("Failed to create timer"); + } + + LARGE_INTEGER li; + li.QuadPart = -ns_units; + if(!SetWaitableTimer(timer, &li, 0, NULL, NULL, false)){ + CloseHandle(timer); + throw std::logic_error("Failed to set timer"); + } + if (WaitForSingleObject(timer, INFINITE) != WAIT_OBJECT_0) { + CloseHandle(timer); + throw std::logic_error("Failed to wait timer"); + } + CloseHandle(timer); +#else + using namespace std::chrono; + std::this_thread::sleep_for(duration(ms)); +#endif +} + +template +typename duration_t::rep measure(std::function f) { + using namespace std::chrono; + auto start = high_resolution_clock::now(); + f(); + return duration_cast( + high_resolution_clock::now() - start).count(); +} + +template +typename duration_t::rep timestamp() { + using namespace std::chrono; + auto now = high_resolution_clock::now(); + return duration_cast(now.time_since_epoch()).count(); +} + +} // namespace utils + +#endif // OPENCV_GAPI_PIPELINE_MODELING_TOOL_UTILS_HPP diff --git a/modules/gapi/src/api/gframe.cpp b/modules/gapi/src/api/gframe.cpp index 1acaa9b766..b0830b7a63 100644 --- a/modules/gapi/src/api/gframe.cpp +++ b/modules/gapi/src/api/gframe.cpp @@ -44,6 +44,7 @@ std::ostream& operator<<(std::ostream& os, const cv::GFrameDesc &d) { switch (d.fmt) { case MediaFormat::BGR: os << "BGR"; break; case MediaFormat::NV12: os << "NV12"; break; + case MediaFormat::GRAY: os << "GRAY"; break; default: GAPI_Assert(false && "Invalid media format"); } os << ' ' << d.size << ']'; diff --git a/modules/gapi/src/api/media.cpp b/modules/gapi/src/api/media.cpp index b1c455d40a..a3643e378c 100644 --- a/modules/gapi/src/api/media.cpp +++ b/modules/gapi/src/api/media.cpp @@ -36,7 +36,7 @@ cv::MediaFrame::IAdapter* cv::MediaFrame::getAdapter() const { } void cv::MediaFrame::serialize(cv::gapi::s11n::IOStream& os) const { - return m->adapter->serialize(os); + m->adapter->serialize(os); } cv::MediaFrame::View::View(Ptrs&& ptrs, Strides&& strs, Callback &&cb) diff --git a/modules/gapi/src/backends/fluid/gfluidcore.cpp b/modules/gapi/src/backends/fluid/gfluidcore.cpp index c33129a0f1..22f73e553c 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore.cpp @@ -886,25 +886,6 @@ static void run_arithm_s(DST out[], const SRC in[], int width, int chan, CV_Error(cv::Error::StsBadArg, "unsupported number of channels"); } -template -static void run_absdiffc(Buffer &dst, const View &src, const float scalar[]) -{ - const auto *in = src.InLine(0); - auto *out = dst.OutLine(); - - int width = dst.length(); - int chan = dst.meta().chan; - const int length = width * chan; - - int w = 0; -#if CV_SIMD - w = absdiffc_simd(in, scalar, out, length, chan); -#endif - - for (; w < length; ++w) - out[w] = absdiff(in[w], scalar[w%chan]); -} - template CV_ALWAYS_INLINE void run_arithm_s(Buffer &dst, const View &src, const float scalar[], Arithm arithm, float scale=1) @@ -950,11 +931,6 @@ CV_ALWAYS_INLINE void run_arithm_s(Buffer &dst, const View &src, const float sca out[chan * w + c] = mul(in[chan * w + c], scalar[c], scale); break; } - case ARITHM_DIVIDE: - for (int w=0; w < width; w++) - for (int c=0; c < chan; c++) - out[chan*w + c] = div(in[chan*w + c], scalar[c], scale); - break; default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation"); } } @@ -992,6 +968,14 @@ static void run_arithm_rs(Buffer &dst, const View &src, const float scalar[4], A } } +CV_ALWAYS_INLINE void setScratchSize(Buffer& scratch, const int buflen) +{ + cv::Size bufsize(buflen, 1); + GMatDesc bufdesc = { CV_32F, 1, bufsize }; + Buffer buffer(bufdesc); + scratch = std::move(buffer); +} + CV_ALWAYS_INLINE void initScratchBuffer(Buffer& scratch) { #if CV_SIMD @@ -1012,25 +996,47 @@ CV_ALWAYS_INLINE void initScratchBuffer(Buffer& scratch) #else constexpr int buflen = 4; #endif - cv::Size bufsize(buflen, 1); - GMatDesc bufdesc = { CV_32F, 1, bufsize }; - Buffer buffer(bufdesc); - scratch = std::move(buffer); + setScratchSize(scratch, buflen); +} + +CV_ALWAYS_INLINE void scalar_to_scratch(const cv::Scalar& scalar, + float scratch[], const int length, const int chan) +{ + for (int i = 0; i < length; ++i) + scratch[i] = static_cast(scalar[i % chan]); +} + +template +CV_ALWAYS_INLINE void run_absdiffc(Buffer& dst, const View& src, const float scalar[]) +{ + const auto* in = src.InLine(0); + auto* out = dst.OutLine(); + + int width = dst.length(); + int chan = dst.meta().chan; + const int length = width * chan; + + int w = 0; +#if CV_SIMD + w = absdiffc_simd(in, scalar, out, length, chan); +#endif + + for (; w < length; ++w) + out[w] = absdiff(in[w], scalar[w % chan]); } GAPI_FLUID_KERNEL(GFluidAbsDiffC, cv::gapi::core::GAbsDiffC, true) { static const int Window = 1; - static void run(const View &src, const cv::Scalar& _scalar, Buffer &dst, Buffer& scratch) + static void run(const View& src, const cv::Scalar& _scalar, Buffer& dst, Buffer& scratch) { if (dst.y() == 0) { const int chan = src.meta().chan; - float* sc = scratch.OutLine(); + float* _scratch = scratch.OutLine(); - for (int i = 0; i < scratch.length(); ++i) - sc[i] = static_cast(_scalar[i % chan]); + scalar_to_scratch(_scalar, _scratch, scratch.length(), chan); } const float* scalar = scratch.OutLine(); @@ -1058,17 +1064,16 @@ GAPI_FLUID_KERNEL(GFluidAddC, cv::gapi::core::GAddC, true) { static const int Window = 1; - static void run(const View &src, const cv::Scalar &_scalar, int /*dtype*/, Buffer &dst, Buffer &scratch) + static void run(const View& src, const cv::Scalar& _scalar, int /*dtype*/, Buffer& dst, Buffer& scratch) { GAPI_Assert(src.meta().chan <= 4); if (dst.y() == 0) { const int chan = src.meta().chan; - float* sc = scratch.OutLine(); + float* _scratch = scratch.OutLine(); - for (int i = 0; i < scratch.length(); ++i) - sc[i] = static_cast(_scalar[i % chan]); + scalar_to_scratch(_scalar, _scratch, scratch.length(), chan); } const float* scalar = scratch.OutLine(); @@ -1115,10 +1120,9 @@ GAPI_FLUID_KERNEL(GFluidSubC, cv::gapi::core::GSubC, true) if (dst.y() == 0) { const int chan = src.meta().chan; - float* sc = scratch.OutLine(); + float* _scratch = scratch.OutLine(); - for (int i = 0; i < scratch.length(); ++i) - sc[i] = static_cast(_scalar[i % chan]); + scalar_to_scratch(_scalar, _scratch, scratch.length(), chan); } const float* scalar = scratch.OutLine(); @@ -1165,10 +1169,9 @@ GAPI_FLUID_KERNEL(GFluidSubRC, cv::gapi::core::GSubRC, true) if (dst.y() == 0) { const int chan = src.meta().chan; - float* sc = scratch.OutLine(); + float* _scratch = scratch.OutLine(); - for (int i = 0; i < scratch.length(); ++i) - sc[i] = static_cast(_scalar[i % chan]); + scalar_to_scratch(_scalar, _scratch, scratch.length(), chan); } const float* scalar = scratch.OutLine(); @@ -1216,10 +1219,9 @@ GAPI_FLUID_KERNEL(GFluidMulC, cv::gapi::core::GMulC, true) if (dst.y() == 0) { const int chan = src.meta().chan; - float* sc = scratch.OutLine(); + float* _scratch = scratch.OutLine(); - for (int i = 0; i < scratch.length(); ++i) - sc[i] = static_cast(_scalar[i % chan]); + scalar_to_scratch(_scalar, _scratch, scratch.length(), chan); } const float* scalar = scratch.OutLine(); const float scale = 1.0; @@ -1259,7 +1261,7 @@ GAPI_FLUID_KERNEL(GFluidMulCOld, cv::gapi::core::GMulCOld, true) { static const int Window = 1; - static void run(const View &src, double _scalar, int /*dtype*/, Buffer &dst, Buffer& scratch) + static void run(const View& src, double _scalar, int /*dtype*/, Buffer& dst, Buffer& scratch) { GAPI_Assert(src.meta().chan <= 4); @@ -1295,32 +1297,109 @@ GAPI_FLUID_KERNEL(GFluidMulCOld, cv::gapi::core::GMulCOld, true) } }; -GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, false) +template +CV_ALWAYS_INLINE void run_divc(Buffer& dst, const View& src, Buffer& scratch, + float scale) +{ + const auto* in = src.InLine(0); + auto* out = dst.OutLine(); + const float* scalar = scratch.OutLine(); + + int width = dst.length(); + int chan = dst.meta().chan; + const int length = width * chan; + + int w = 0; +#if CV_SIMD + int scratch_length = scratch.length(); + int indicator_offset = scratch_length - 1; + const int set_mask_indicator = static_cast(*(scratch.OutLine() + (indicator_offset))); + + w = divc_simd(in, scalar, out, length, chan, scale, set_mask_indicator); +#endif + + for (; w < length; ++w) + out[w] = div(in[w], scalar[w % chan], scale); +} + +GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, true) { static const int Window = 1; - static void run(const View &src, const cv::Scalar &_scalar, double _scale, int /*dtype*/, - Buffer &dst) + static void run(const View& src, const cv::Scalar& _scalar, double _scale, int /*dtype*/, + Buffer& dst, Buffer& scratch) { - const float scalar[4] = { - static_cast(_scalar[0]), - static_cast(_scalar[1]), - static_cast(_scalar[2]), - static_cast(_scalar[3]) - }; - const float scale = static_cast(_scale); + GAPI_Assert(src.meta().chan <= 4); + + if (dst.y() == 0) + { + const int chan = src.meta().chan; + float* _scratch = scratch.OutLine(); + int scratch_length = scratch.length(); + + scalar_to_scratch(_scalar, _scratch, scratch_length - 1, chan); + + _scratch[scratch_length - 1] = 0.0; + for (int j = 0; j < chan; ++j) + { + if (std::fabs(static_cast(_scalar[j])) <= FLT_EPSILON) + { + _scratch[scratch_length - 1] = 1.0; + break; + } + } + } + + float scale = static_cast(_scale); // DST SRC OP __VA_ARGS__ - UNARY_(uchar , uchar , run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale); - UNARY_(uchar , short, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale); - UNARY_(uchar , float, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale); - UNARY_( short, short, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale); - UNARY_( float, uchar , run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale); - UNARY_( float, short, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale); - UNARY_( float, float, run_arithm_s, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(uchar, uchar, run_divc, dst, src, scratch, scale); + UNARY_(uchar, ushort, run_divc, dst, src, scratch, scale); + UNARY_(uchar, short, run_divc, dst, src, scratch, scale); + UNARY_(uchar, float, run_divc, dst, src, scratch, scale); + UNARY_(ushort, ushort, run_divc, dst, src, scratch, scale); + UNARY_(ushort, uchar, run_divc, dst, src, scratch, scale); + UNARY_(ushort, short, run_divc, dst, src, scratch, scale); + UNARY_(ushort, float, run_divc, dst, src, scratch, scale); + UNARY_(short, short, run_divc, dst, src, scratch, scale); + UNARY_(short, ushort, run_divc, dst, src, scratch, scale); + UNARY_(short, uchar, run_divc, dst, src, scratch, scale); + UNARY_(short, float, run_divc, dst, src, scratch, scale); + UNARY_(float, uchar, run_divc, dst, src, scratch, scale); + UNARY_(float, short, run_divc, dst, src, scratch, scale); + UNARY_(float, ushort, run_divc, dst, src, scratch, scale); + UNARY_(float, float, run_divc, dst, src, scratch, scale); CV_Error(cv::Error::StsBadArg, "unsupported combination of types"); } + + static void initScratch(const GMatDesc&, const GScalarDesc&, double, int, Buffer& scratch) + { +#if CV_SIMD + // 512 bits / 32 bits = 16 elements of float32 a AVX512 SIMD vector can contain. + constexpr int maxNlanes = 16; + + // +2 is offset for 3-channel case. + // Offset is need to right load coefficients from scalar array to SIMD vectors for 3-channel case. + // Scalar array looks like: scalar[] = {C1, C2, C3, C1, C2, C3, ...} + // The first scalar SIMD vector should looks like: + // C1 C2 C3 C1 + // The second: + // C2 C3 C1 C2 + // The third: + // C3 C1 C2 C3 + constexpr int offset = 2; + constexpr int zero_scalar_elem_indicator = 1; + constexpr int buflen = maxNlanes + offset + zero_scalar_elem_indicator; +#else + constexpr int buflen = 4; +#endif + setScratchSize(scratch, buflen); + } + + static void resetScratch(Buffer& /*scratch*/) + { + } }; GAPI_FLUID_KERNEL(GFluidDivRC, cv::gapi::core::GDivRC, false) @@ -2509,26 +2588,18 @@ GAPI_FLUID_KERNEL(GFluidSplit3, cv::gapi::core::GSplit3, false) static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3) { - const auto *in = src.InLine(0); + const auto *in = src.InLine(0); auto *out1 = dst1.OutLine(); auto *out2 = dst2.OutLine(); auto *out3 = dst3.OutLine(); GAPI_Assert(3 == src.meta().chan); int width = src.length(); + int w = 0; - int w = 0; // cycle counter - - #if CV_SIMD128 - for (; w <= width-16; w+=16) - { - v_uint8x16 a, b, c; - v_load_deinterleave(&in[3*w], a, b, c); - v_store(&out1[w], a); - v_store(&out2[w], b); - v_store(&out3[w], c); - } - #endif +#if CV_SIMD + w = split3_simd(in, out1, out2, out3, width); +#endif for (; w < width; w++) { @@ -2545,7 +2616,7 @@ GAPI_FLUID_KERNEL(GFluidSplit4, cv::gapi::core::GSplit4, false) static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3, Buffer &dst4) { - const auto *in = src.InLine(0); + const auto *in = src.InLine(0); auto *out1 = dst1.OutLine(); auto *out2 = dst2.OutLine(); auto *out3 = dst3.OutLine(); @@ -2553,19 +2624,10 @@ GAPI_FLUID_KERNEL(GFluidSplit4, cv::gapi::core::GSplit4, false) GAPI_Assert(4 == src.meta().chan); int width = src.length(); + int w = 0; - int w = 0; // cycle counter - - #if CV_SIMD128 - for (; w <= width-16; w+=16) - { - v_uint8x16 a, b, c, d; - v_load_deinterleave(&in[4*w], a, b, c, d); - v_store(&out1[w], a); - v_store(&out2[w], b); - v_store(&out3[w], c); - v_store(&out4[w], d); - } + #if CV_SIMD + w = split4_simd(in, out1, out2, out3, out4, width); #endif for (; w < width; w++) @@ -2591,18 +2653,10 @@ GAPI_FLUID_KERNEL(GFluidMerge3, cv::gapi::core::GMerge3, false) GAPI_Assert(3 == dst.meta().chan); int width = dst.length(); + int w = 0; - int w = 0; // cycle counter - - #if CV_SIMD128 - for (; w <= width-16; w+=16) - { - v_uint8x16 a, b, c; - a = v_load(&in1[w]); - b = v_load(&in2[w]); - c = v_load(&in3[w]); - v_store_interleave(&out[3*w], a, b, c); - } + #if CV_SIMD + w = merge3_simd(in1, in2, in3, out, width); #endif for (; w < width; w++) @@ -2632,16 +2686,8 @@ GAPI_FLUID_KERNEL(GFluidMerge4, cv::gapi::core::GMerge4, false) int w = 0; // cycle counter - #if CV_SIMD128 - for (; w <= width-16; w+=16) - { - v_uint8x16 a, b, c, d; - a = v_load(&in1[w]); - b = v_load(&in2[w]); - c = v_load(&in3[w]); - d = v_load(&in4[w]); - v_store_interleave(&out[4*w], a, b, c, d); - } + #if CV_SIMD + w = merge4_simd(in1, in2, in3, in4, out, width); #endif for (; w < width; w++) diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp index 348c00ed12..9afac9ceb4 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp @@ -192,6 +192,34 @@ MULC_SIMD(float, float) #undef MULC_SIMD +#define DIVC_SIMD(SRC, DST) \ +int divc_simd(const SRC in[], const float scalar[], DST out[], \ + const int length, const int chan, const float scale, \ + const int set_mask_flag) \ +{ \ + CV_CPU_DISPATCH(divc_simd, (in, scalar, out, length, chan, scale, set_mask_flag), \ + CV_CPU_DISPATCH_MODES_ALL); \ +} + +DIVC_SIMD(uchar, uchar) +DIVC_SIMD(ushort, uchar) +DIVC_SIMD(short, uchar) +DIVC_SIMD(float, uchar) +DIVC_SIMD(short, short) +DIVC_SIMD(ushort, short) +DIVC_SIMD(uchar, short) +DIVC_SIMD(float, short) +DIVC_SIMD(ushort, ushort) +DIVC_SIMD(uchar, ushort) +DIVC_SIMD(short, ushort) +DIVC_SIMD(float, ushort) +DIVC_SIMD(uchar, float) +DIVC_SIMD(ushort, float) +DIVC_SIMD(short, float) +DIVC_SIMD(float, float) + +#undef DIVC_SIMD + #define ABSDIFFC_SIMD(SRC) \ int absdiffc_simd(const SRC in[], const float scalar[], SRC out[], \ const int length, const int chan) \ @@ -207,6 +235,34 @@ ABSDIFFC_SIMD(float) #undef ABSDIFFC_SIMD +int split3_simd(const uchar in[], uchar out1[], uchar out2[], + uchar out3[], const int width) +{ + CV_CPU_DISPATCH(split3_simd, (in, out1, out2, out3, width), + CV_CPU_DISPATCH_MODES_ALL); +} + +int split4_simd(const uchar in[], uchar out1[], uchar out2[], + uchar out3[], uchar out4[], const int width) +{ + CV_CPU_DISPATCH(split4_simd, (in, out1, out2, out3, out4, width), + CV_CPU_DISPATCH_MODES_ALL); +} + +int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], + uchar out[], const int width) +{ + CV_CPU_DISPATCH(merge3_simd, (in1, in2, in3, out, width), + CV_CPU_DISPATCH_MODES_ALL); +} + +int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], + const uchar in4[], uchar out[], const int width) +{ + CV_CPU_DISPATCH(merge4_simd, (in1, in2, in3, in4, out, width), + CV_CPU_DISPATCH_MODES_ALL); +} + } // namespace fluid } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp index 6023a879d9..868923932d 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp @@ -152,6 +152,30 @@ MULC_SIMD(float, float) #undef MULC_SIMD +#define DIVC_SIMD(SRC, DST) \ +int divc_simd(const SRC in[], const float scalar[], DST out[], \ + const int length, const int chan, const float scale, \ + const int set_mask_flag); + +DIVC_SIMD(uchar, uchar) +DIVC_SIMD(ushort, uchar) +DIVC_SIMD(short, uchar) +DIVC_SIMD(float, uchar) +DIVC_SIMD(short, short) +DIVC_SIMD(ushort, short) +DIVC_SIMD(uchar, short) +DIVC_SIMD(float, short) +DIVC_SIMD(ushort, ushort) +DIVC_SIMD(uchar, ushort) +DIVC_SIMD(short, ushort) +DIVC_SIMD(float, ushort) +DIVC_SIMD(uchar, float) +DIVC_SIMD(ushort, float) +DIVC_SIMD(short, float) +DIVC_SIMD(float, float) + +#undef DIVC_SIMD + #define ABSDIFFC_SIMD(T) \ int absdiffc_simd(const T in[], const float scalar[], T out[], \ const int length, const int chan); @@ -163,6 +187,18 @@ ABSDIFFC_SIMD(float) #undef ABSDIFFC_SIMD +int split3_simd(const uchar in[], uchar out1[], uchar out2[], + uchar out3[], const int width); + +int split4_simd(const uchar in[], uchar out1[], uchar out2[], + uchar out3[], uchar out4[], const int width); + +int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], + uchar out[], const int width); + +int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], + const uchar in4[], uchar out[], const int width); + } // namespace fluid } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp index 38c47072f4..2424a57677 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp @@ -173,6 +173,30 @@ MULC_SIMD(float, float) #undef MULC_SIMD +#define DIVC_SIMD(SRC, DST) \ +int divc_simd(const SRC in[], const float scalar[], DST out[], \ + const int length, const int chan, const float scale, \ + const int set_mask_flag); + +DIVC_SIMD(uchar, uchar) +DIVC_SIMD(ushort, uchar) +DIVC_SIMD(short, uchar) +DIVC_SIMD(float, uchar) +DIVC_SIMD(short, short) +DIVC_SIMD(ushort, short) +DIVC_SIMD(uchar, short) +DIVC_SIMD(float, short) +DIVC_SIMD(ushort, ushort) +DIVC_SIMD(uchar, ushort) +DIVC_SIMD(short, ushort) +DIVC_SIMD(float, ushort) +DIVC_SIMD(uchar, float) +DIVC_SIMD(ushort, float) +DIVC_SIMD(short, float) +DIVC_SIMD(float, float) + +#undef DIVC_SIMD + #define ABSDIFFC_SIMD(T) \ int absdiffc_simd(const T in[], const float scalar[], T out[], \ const int length, const int chan); @@ -184,6 +208,18 @@ ABSDIFFC_SIMD(float) #undef ABSDIFFC_SIMD +int split3_simd(const uchar in[], uchar out1[], uchar out2[], + uchar out3[], const int width); + +int split4_simd(const uchar in[], uchar out1[], uchar out2[], + uchar out3[], uchar out4[], const int width); + +int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], + uchar out[], const int width); + +int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], + const uchar in4[], uchar out[], const int width); + #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY struct scale_tag {}; @@ -935,6 +971,7 @@ struct add_tag {}; struct sub_tag {}; struct subr_tag {}; struct mul_tag {}; +struct div_tag {}; struct absdiff_tag {}; CV_ALWAYS_INLINE void arithmOpScalar_pack_store_c3(short* outx, const v_int32& c1, @@ -979,6 +1016,21 @@ CV_ALWAYS_INLINE v_float32 oper(mul_tag, const v_float32& a, const v_float32& sc return a * sc; } +CV_ALWAYS_INLINE v_float32 oper_scaled(mul_tag, const v_float32& a, const v_float32& v_scalar, const v_float32& v_scale) +{ + return v_scale * a * v_scalar; +} + +CV_ALWAYS_INLINE v_float32 oper(div_tag, const v_float32& a, const v_float32& sc) +{ + return a / sc; +} + +CV_ALWAYS_INLINE v_float32 oper_scaled(div_tag, const v_float32& a, const v_float32& v_scalar, const v_float32& v_scale) +{ + return a*v_scale / v_scalar; +} + CV_ALWAYS_INLINE v_float32 oper(absdiff_tag, const v_float32& a, const v_float32& sc) { return v_absdiff(a, sc); @@ -1288,16 +1340,17 @@ SUBRC_SIMD(float, float) //------------------------- // -// Fluid kernels: MulC +// Fluid kernels: MulC, DivC // //------------------------- -template +template CV_ALWAYS_INLINE typename std::enable_if::value || std::is_same::value, void>::type -mulc_scale_simd_c3_impl(const SRC* inx, DST* outx, const v_float32& s1, const v_float32& s2, - const v_float32& s3, const v_float32& scale, const int nlanes) +arithmOpScalarScaled_simd_c3_impl(oper_tag op, SRC* inx, DST* outx, const v_float32& s1, + const v_float32& s2, const v_float32& s3, + const v_float32& v_scale, const int nlanes) { v_float32 a1 = vg_load_f32(inx); v_float32 a2 = vg_load_f32(&inx[nlanes / 2]); @@ -1306,62 +1359,64 @@ mulc_scale_simd_c3_impl(const SRC* inx, DST* outx, const v_float32& s1, const v_ v_float32 a5 = vg_load_f32(&inx[2 * nlanes]); v_float32 a6 = vg_load_f32(&inx[5 * nlanes / 2]); - arithmOpScalar_pack_store_c3(outx, v_round(scale*a1*s1), - v_round(scale*a2*s2), - v_round(scale*a3*s3), - v_round(scale*a4*s1), - v_round(scale*a5*s2), - v_round(scale*a6*s3)); + arithmOpScalar_pack_store_c3(outx, v_round(oper_scaled(op, a1, s1, v_scale)), + v_round(oper_scaled(op, a2, s2, v_scale)), + v_round(oper_scaled(op, a3, s3, v_scale)), + v_round(oper_scaled(op, a4, s1, v_scale)), + v_round(oper_scaled(op, a5, s2, v_scale)), + v_round(oper_scaled(op, a6, s3, v_scale))); } //------------------------------------------------------------------------------------------------- -template -CV_ALWAYS_INLINE void mulc_scale_simd_c3_impl(const SRC* inx, uchar* outx, - const v_float32& s1, const v_float32& s2, - const v_float32& s3, const v_float32& scale, const int nlanes) +template +CV_ALWAYS_INLINE void arithmOpScalarScaled_simd_c3_impl(oper_tag op, const SRC* inx, uchar* outx, + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, + const int nlanes) { vx_store(outx, - v_pack_u(v_pack(v_round(scale * vg_load_f32(inx)* s1), - v_round(scale * vg_load_f32(&inx[nlanes/4])* s2)), - v_pack(v_round(scale * vg_load_f32(&inx[nlanes/2])* s3), - v_round(scale * vg_load_f32(&inx[3*nlanes/4])* s1)))); + v_pack_u(v_pack(v_round(oper_scaled(op, vg_load_f32(inx), s1, v_scale)), + v_round(oper_scaled(op, vg_load_f32(&inx[nlanes/4]), s2, v_scale))), + v_pack(v_round(oper_scaled(op, vg_load_f32(&inx[nlanes/2]), s3, v_scale)), + v_round(oper_scaled(op, vg_load_f32(&inx[3*nlanes/4]), s1, v_scale))))); vx_store(&outx[nlanes], - v_pack_u(v_pack(v_round(scale * vg_load_f32(&inx[nlanes])* s2), - v_round(scale * vg_load_f32(&inx[5*nlanes/4])* s3)), - v_pack(v_round(scale * vg_load_f32(&inx[3*nlanes/2])* s1), - v_round(scale * vg_load_f32(&inx[7*nlanes/4])* s2)))); + v_pack_u(v_pack(v_round(oper_scaled(op, vg_load_f32(&inx[nlanes]), s2, v_scale)), + v_round(oper_scaled(op, vg_load_f32(&inx[5*nlanes/4]), s3, v_scale))), + v_pack(v_round(oper_scaled(op, vg_load_f32(&inx[3*nlanes/2]), s1, v_scale)), + v_round(oper_scaled(op, vg_load_f32(&inx[7*nlanes/4]), s2, v_scale))))); vx_store(&outx[2 * nlanes], - v_pack_u(v_pack(v_round(scale * vg_load_f32(&inx[2*nlanes])* s3), - v_round(scale * vg_load_f32(&inx[9*nlanes/4])* s1)), - v_pack(v_round(scale * vg_load_f32(&inx[5*nlanes/2])* s2), - v_round(scale * vg_load_f32(&inx[11*nlanes/4])* s3)))); + v_pack_u(v_pack(v_round(oper_scaled(op, vg_load_f32(&inx[2*nlanes]), s3, v_scale)), + v_round(oper_scaled(op, vg_load_f32(&inx[9*nlanes/4]), s1, v_scale))), + v_pack(v_round(oper_scaled(op, vg_load_f32(&inx[5*nlanes/2]), s2, v_scale)), + v_round(oper_scaled(op, vg_load_f32(&inx[11*nlanes/4]), s3, v_scale))))); } //------------------------------------------------------------------------------------------------- -template -CV_ALWAYS_INLINE void mulc_scale_simd_c3_impl(const SRC* in, float* out, - const v_float32& s1, const v_float32& s2, - const v_float32& s3, const v_float32& scale, const int nlanes) +template +CV_ALWAYS_INLINE void arithmOpScalarScaled_simd_c3_impl(oper_tag op, const SRC* in, float* out, + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, + const int nlanes) { v_float32 a1 = vg_load_f32(in); v_float32 a2 = vg_load_f32(&in[nlanes]); v_float32 a3 = vg_load_f32(&in[2*nlanes]); - vx_store(out, scale * a1* s1); - vx_store(&out[nlanes], scale * a2* s2); - vx_store(&out[2*nlanes], scale * a3* s3); + vx_store(out, oper_scaled(op, a1, s1, v_scale)); + vx_store(&out[nlanes], oper_scaled(op, a2, s2, v_scale)); + vx_store(&out[2*nlanes], oper_scaled(op, a3, s3, v_scale)); } //------------------------------------------------------------------------------------------------- -template -CV_ALWAYS_INLINE int mulc_scale_simd_c3(const SRC in[], - const float scalar[], DST out[], - const int length, const float _scale) +template +CV_ALWAYS_INLINE int arithmOpScalarScaled_simd_c3(oper_tag op, const SRC in[], + const float scalar[], DST out[], + const int length, const float scale) { constexpr int chan = 3; constexpr int nlanes = vector_type_of_t::nlanes; @@ -1370,7 +1425,7 @@ CV_ALWAYS_INLINE int mulc_scale_simd_c3(const SRC in[], if (length < lanes) return 0; - v_float32 scale = vx_setall_f32(_scale); + v_float32 v_scale = vx_setall_f32(scale); v_float32 s1 = vx_load(scalar); #if CV_SIMD_WIDTH == 32 @@ -1386,7 +1441,7 @@ CV_ALWAYS_INLINE int mulc_scale_simd_c3(const SRC in[], { for (; x <= length - lanes; x += lanes) { - mulc_scale_simd_c3_impl(&in[x], &out[x], s1, s2, s3, scale, nlanes); + arithmOpScalarScaled_simd_c3_impl(op, &in[x], &out[x], s1, s2, s3, v_scale, nlanes); } if (x < length) @@ -1401,70 +1456,70 @@ CV_ALWAYS_INLINE int mulc_scale_simd_c3(const SRC in[], //------------------------------------------------------------------------------------------------- -template +template CV_ALWAYS_INLINE typename std::enable_if<(std::is_same::value || std::is_same::value), void>::type -mulc_scale_simd_common_impl(const SRC* inx, DST* outx, - const v_float32& sc, const v_float32& scale, - const int nlanes) +arithmOpScalarScaled_simd_common_impl(oper_tag op, const SRC* inx, DST* outx, + const v_float32& v_scalar, const v_float32& v_scale, + const int nlanes) { v_float32 a1 = vg_load_f32(inx); v_float32 a2 = vg_load_f32(&inx[nlanes/2]); - v_store_i16(outx, v_round(scale * a1* sc), v_round(scale * a2* sc)); + v_store_i16(outx, v_round(oper_scaled(op, a1, v_scalar, v_scale)), v_round(oper_scaled(op, a2, v_scalar, v_scale))); } //------------------------------------------------------------------------------------------------- -template -CV_ALWAYS_INLINE void mulc_scale_simd_common_impl(const SRC* inx, - uchar* outx, const v_float32& sc, - const v_float32& scale, const int nlanes) +template +CV_ALWAYS_INLINE void arithmOpScalarScaled_simd_common_impl(oper_tag op, const SRC* inx, + uchar* outx, const v_float32& v_scalar, + const v_float32& v_scale, const int nlanes) { v_float32 a1 = vg_load_f32(inx); v_float32 a2 = vg_load_f32(&inx[nlanes/4]); v_float32 a3 = vg_load_f32(&inx[nlanes/2]); v_float32 a4 = vg_load_f32(&inx[3 * nlanes/4]); - vx_store(outx, v_pack_u(v_pack(v_round(scale * a1* sc), - v_round(scale * a2* sc)), - v_pack(v_round(scale * a3* sc), - v_round(scale * a4* sc)))); + vx_store(outx, v_pack_u(v_pack(v_round(oper_scaled(op, a1, v_scalar, v_scale)), + v_round(oper_scaled(op, a2, v_scalar, v_scale))), + v_pack(v_round(oper_scaled(op, a3, v_scalar, v_scale)), + v_round(oper_scaled(op, a4, v_scalar, v_scale))))); } //------------------------------------------------------------------------------------------------- -template -CV_ALWAYS_INLINE void mulc_scale_simd_common_impl(const SRC* inx, - float* outx, const v_float32& sc, - const v_float32& scale, const int) +template +CV_ALWAYS_INLINE void arithmOpScalarScaled_simd_common_impl(oper_tag op, const SRC* inx, + float* outx, const v_float32& v_scalar, + const v_float32& v_scale, const int) { - v_float32 a1 = vg_load_f32(inx); - vx_store(outx, scale * a1* sc); + v_float32 a = vg_load_f32(inx); + vx_store(outx, oper_scaled(op, a, v_scalar, v_scale)); } //------------------------------------------------------------------------------------------------- -template -CV_ALWAYS_INLINE int mulc_scale_simd_common(const SRC in[], - const float scalar[], DST out[], - const int length, const float _scale) +template +CV_ALWAYS_INLINE int arithmOpScalarScaled_simd_common(oper_tag op, const SRC in[], + const float scalar[], DST out[], + const int length, const float scale) { constexpr int nlanes = vector_type_of_t::nlanes; if (length < nlanes) return 0; - v_float32 _scalar = vx_load(scalar); - v_float32 scale = vx_setall_f32(_scale); + v_float32 v_scalar = vx_load(scalar); + v_float32 v_scale = vx_setall_f32(scale); int x = 0; for (;;) { for (; x <= length - nlanes; x += nlanes) { - mulc_scale_simd_common_impl(&in[x], &out[x], _scalar, scale, nlanes); + arithmOpScalarScaled_simd_common_impl(op, &in[x], &out[x], v_scalar, v_scale, nlanes); } if (x < length) @@ -1477,6 +1532,8 @@ CV_ALWAYS_INLINE int mulc_scale_simd_common(const SRC in[], return x; } +//------------------------------------------------------------------------------------------------- + #define MULC_SIMD(SRC, DST) \ int mulc_simd(const SRC in[], const float scalar[], DST out[], \ const int length, const int chan, const float scale) \ @@ -1495,7 +1552,8 @@ int mulc_simd(const SRC in[], const float scalar[], DST out[], \ } \ else \ { \ - return mulc_scale_simd_common(in, scalar, out, length, scale); \ + return arithmOpScalarScaled_simd_common(op_t, in, scalar, out, \ + length, scale); \ } \ } \ case 3: \ @@ -1507,7 +1565,8 @@ int mulc_simd(const SRC in[], const float scalar[], DST out[], \ } \ else \ { \ - return mulc_scale_simd_c3(in, scalar, out, length, scale); \ + return arithmOpScalarScaled_simd_c3(op_t, in, scalar, out, \ + length, scale); \ } \ } \ default: \ @@ -1536,6 +1595,355 @@ MULC_SIMD(float, float) #undef MULC_SIMD +//------------------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE +typename std::enable_if<(std::is_same::value || + std::is_same::value), int>::type +divc_simd_common_impl(scale_tag_t s_tag, const SRC in[], DST out[], + const v_float32& v_scalar, const v_float32& v_scale, + const int length) +{ + constexpr int nlanes = vector_type_of_t::nlanes; + + v_float32 v_zero = vx_setzero_f32(); + v_float32 v_mask = (v_scalar == v_zero); + + int x = 0; + for (;;) + { + for (; x <= length - nlanes; x += nlanes) + { + v_float32 a1 = vg_load_f32(&in[x]); + v_float32 a2 = vg_load_f32(&in[x + nlanes/2]); + + v_store_i16(&out[x], v_round(v_select(v_mask, v_zero, div_op(s_tag, a1, v_scalar, v_scale))), + v_round(v_select(v_mask, v_zero, div_op(s_tag, a2, v_scalar, v_scale)))); + } + + if (x < length) + { + x = length - nlanes; + continue; // process unaligned tail + } + break; + } + return x; +} + +//------------------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE int divc_simd_common_impl(scale_tag_t s_tag, const SRC in[], + uchar out[], const v_float32& v_scalar, + const v_float32& v_scale, const int length) +{ + constexpr int nlanes = v_uint8::nlanes; + + v_float32 v_zero = vx_setzero_f32(); + v_float32 v_mask = (v_scalar == v_zero); + + int x = 0; + for (;;) + { + for (; x <= length - nlanes; x += nlanes) + { + v_float32 a1 = vg_load_f32(&in[x]); + v_float32 a2 = vg_load_f32(&in[x + nlanes/4]); + v_float32 a3 = vg_load_f32(&in[x + nlanes/2]); + v_float32 a4 = vg_load_f32(&in[x + 3 * nlanes/4]); + + vx_store(&out[x], v_pack_u(v_pack(v_round(v_select(v_mask, v_zero, div_op(s_tag, a1, v_scalar, v_scale))), + v_round(v_select(v_mask, v_zero, div_op(s_tag, a2, v_scalar, v_scale)))), + v_pack(v_round(v_select(v_mask, v_zero, div_op(s_tag, a3, v_scalar, v_scale))), + v_round(v_select(v_mask, v_zero, div_op(s_tag, a4, v_scalar, v_scale)))))); + } + + if (x < length) + { + x = length - nlanes; + continue; // process unaligned tail + } + break; + } + return x; +} + +//------------------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE int divc_simd_common_impl(scale_tag_t s_tag, const SRC in[], + float out[], const v_float32& v_scalar, + const v_float32& v_scale, const int length) +{ + constexpr int nlanes = v_float32::nlanes; + int x = 0; + for (;;) + { + for (; x <= length - nlanes; x += nlanes) + { + v_float32 a1 = vg_load_f32(&in[x]); + vx_store(&out[x], div_op(s_tag, a1, v_scalar, v_scale)); + } + + if (x < length) + { + x = length - nlanes; + continue; // process unaligned tail + } + break; + } + return x; +} + +//------------------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE int divc_mask_simd_common(scale_tag_t tag, const SRC in[], + const float scalar[], DST out[], + const int length, const float scale) +{ + constexpr int nlanes = vector_type_of_t::nlanes; + + if (length < nlanes) + return 0; + + v_float32 v_scalar = vx_load(scalar); + v_float32 v_scale = vx_setall_f32(scale); + return divc_simd_common_impl(tag, in, out, v_scalar, v_scale, length); +} + +//------------------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE +typename std::enable_if::value || + std::is_same::value, int>::type +divc_simd_c3_impl(scale_tag_t s_tag, SRC in[], DST out[], const v_float32& s1, + const v_float32& s2, const v_float32& s3, + const v_float32& v_scale, const int length, + const int nlanes, const int lanes) +{ + v_float32 v_zero = vx_setzero_f32(); + v_float32 v_mask1 = (s1 == v_zero); + v_float32 v_mask2 = (s2 == v_zero); + v_float32 v_mask3 = (s3 == v_zero); + + int x = 0; + for (;;) + { + for (; x <= length - lanes; x += lanes) + { + v_float32 a1 = vg_load_f32(&in[x]); + v_float32 a2 = vg_load_f32(&in[x + nlanes / 2]); + v_float32 a3 = vg_load_f32(&in[x + nlanes]); + v_float32 a4 = vg_load_f32(&in[x + 3 * nlanes / 2]); + v_float32 a5 = vg_load_f32(&in[x + 2 * nlanes]); + v_float32 a6 = vg_load_f32(&in[x + 5 * nlanes / 2]); + + arithmOpScalar_pack_store_c3(&out[x], v_round(v_select(v_mask1, v_zero, div_op(s_tag, a1, s1, v_scale))), + v_round(v_select(v_mask2, v_zero, div_op(s_tag, a2, s2, v_scale))), + v_round(v_select(v_mask3, v_zero, div_op(s_tag, a3, s3, v_scale))), + v_round(v_select(v_mask1, v_zero, div_op(s_tag, a4, s1, v_scale))), + v_round(v_select(v_mask2, v_zero, div_op(s_tag, a5, s2, v_scale))), + v_round(v_select(v_mask3, v_zero, div_op(s_tag, a6, s3, v_scale)))); + } + + if (x < length) + { + x = length - lanes; + continue; // process unaligned tail + } + break; + } + return x; +} + +//------------------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE int divc_simd_c3_impl(scale_tag_t s_tag, const SRC* in, uchar* out, + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, + const int length, const int nlanes, const int lanes) +{ + v_float32 v_zero = vx_setzero_f32(); + v_float32 v_mask1 = (s1 == v_zero); + v_float32 v_mask2 = (s2 == v_zero); + v_float32 v_mask3 = (s3 == v_zero); + + int x = 0; + for (;;) + { + for (; x <= length - lanes; x += lanes) + { + vx_store(&out[x], + v_pack_u(v_pack(v_round(v_select(v_mask1, v_zero, div_op(s_tag, vg_load_f32(&in[x]), s1, v_scale))), + v_round(v_select(v_mask2, v_zero, div_op(s_tag, vg_load_f32(&in[x + nlanes/4]), s2, v_scale)))), + v_pack(v_round(v_select(v_mask3, v_zero, div_op(s_tag, vg_load_f32(&in[x + nlanes/2]), s3, v_scale))), + v_round(v_select(v_mask1, v_zero, div_op(s_tag, vg_load_f32(&in[x + 3*nlanes/4]), s1, v_scale)))))); + + vx_store(&out[x + nlanes], + v_pack_u(v_pack(v_round(v_select(v_mask2, v_zero, div_op(s_tag, vg_load_f32(&in[x + nlanes]), s2, v_scale))), + v_round(v_select(v_mask3, v_zero, div_op(s_tag, vg_load_f32(&in[x + 5*nlanes/4]), s3, v_scale)))), + v_pack(v_round(v_select(v_mask1, v_zero, div_op(s_tag, vg_load_f32(&in[x + 3*nlanes/2]), s1, v_scale))), + v_round(v_select(v_mask2, v_zero, div_op(s_tag, vg_load_f32(&in[x + 7*nlanes/4]), s2, v_scale)))))); + + vx_store(&out[x + 2 * nlanes], + v_pack_u(v_pack(v_round(v_select(v_mask3, v_zero, div_op(s_tag, vg_load_f32(&in[x + 2*nlanes]), s3, v_scale))), + v_round(v_select(v_mask1, v_zero, div_op(s_tag, vg_load_f32(&in[x + 9*nlanes/4]), s1, v_scale)))), + v_pack(v_round(v_select(v_mask2, v_zero, div_op(s_tag, vg_load_f32(&in[x + 5*nlanes/2]), s2, v_scale))), + v_round(v_select(v_mask3, v_zero, div_op(s_tag, vg_load_f32(&in[x + 11*nlanes/4]), s3, v_scale)))))); + } + + if (x < length) + { + x = length - lanes; + continue; // process unaligned tail + } + break; + } + return x; +} + +//------------------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE int divc_simd_c3_impl(scale_tag_t s_tag, const SRC* in, float* out, + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, const int length, + const int nlanes, const int lanes) +{ + int x = 0; + for (;;) + { + for (; x <= length - lanes; x += lanes) + { + v_float32 a1 = vg_load_f32(&in[x]); + v_float32 a2 = vg_load_f32(&in[x + nlanes]); + v_float32 a3 = vg_load_f32(&in[x + 2*nlanes]); + + vx_store(&out[x], div_op(s_tag, a1, s1, v_scale)); + vx_store(&out[x + nlanes], div_op(s_tag, a2, s2, v_scale)); + vx_store(&out[x + 2*nlanes], div_op(s_tag, a3, s3, v_scale)); + } + + if (x < length) + { + x = length - lanes; + continue; // process unaligned tail + } + break; + } + return x; +} + +//------------------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE int divc_mask_simd_c3(scale_tag_t s_tag, const SRC in[], + const float scalar[], DST out[], + const int length, const float scale) +{ + constexpr int chan = 3; + constexpr int nlanes = vector_type_of_t::nlanes; + constexpr int lanes = chan * nlanes; + + if (length < lanes) + return 0; + + v_float32 v_scale = vx_setall_f32(scale); + + v_float32 s1 = vx_load(scalar); +#if CV_SIMD_WIDTH == 32 + v_float32 s2 = vx_load(&scalar[2]); + v_float32 s3 = vx_load(&scalar[1]); +#else + v_float32 s2 = vx_load(&scalar[1]); + v_float32 s3 = vx_load(&scalar[2]); +#endif + return divc_simd_c3_impl(s_tag, in, out, s1, s2, s3, v_scale, length, nlanes, lanes); +} + +//------------------------------------------------------------------------------------------------- + +#define DIVC_SIMD(SRC, DST) \ +int divc_simd(const SRC in[], const float scalar[], DST out[], \ + const int length, const int chan, const float scale, \ + const int set_mask_flag) \ +{ \ + switch (chan) \ + { \ + case 1: \ + case 2: \ + case 4: \ + { \ + if (std::fabs(scale - 1.0f) <= FLT_EPSILON) \ + { \ + if (set_mask_flag == 1) \ + return divc_mask_simd_common(not_scale_tag{}, in, scalar, \ + out, length, scale); \ + else \ + return arithmOpScalar_simd_common(div_tag{}, in, scalar, \ + out, length); \ + } \ + else \ + { if (set_mask_flag == 1) \ + return divc_mask_simd_common(scale_tag{}, in, scalar, \ + out, length, scale); \ + else \ + return arithmOpScalarScaled_simd_common(div_tag{}, in, scalar, \ + out, length, scale); \ + } \ + } \ + case 3: \ + { \ + if (std::fabs(scale - 1.0f) <= FLT_EPSILON) \ + { \ + if (set_mask_flag == 1) \ + return divc_mask_simd_c3(not_scale_tag{}, in, scalar, \ + out, length, scale); \ + else \ + return arithmOpScalar_simd_c3(div_tag{}, in, scalar, \ + out, length); \ + } \ + else \ + { \ + if (set_mask_flag == 1) \ + return divc_mask_simd_c3(scale_tag{}, in, scalar, \ + out, length, scale); \ + else \ + return arithmOpScalarScaled_simd_c3(div_tag{}, in, scalar, out,\ + length, scale); \ + } \ + } \ + default: \ + GAPI_Assert(chan <= 4); \ + break; \ + } \ + return 0; \ +} + +DIVC_SIMD(uchar, uchar) +DIVC_SIMD(ushort, uchar) +DIVC_SIMD(short, uchar) +DIVC_SIMD(float, uchar) +DIVC_SIMD(short, short) +DIVC_SIMD(ushort, short) +DIVC_SIMD(uchar, short) +DIVC_SIMD(float, short) +DIVC_SIMD(ushort, ushort) +DIVC_SIMD(uchar, ushort) +DIVC_SIMD(short, ushort) +DIVC_SIMD(float, ushort) +DIVC_SIMD(uchar, float) +DIVC_SIMD(ushort, float) +DIVC_SIMD(short, float) +DIVC_SIMD(float, float) + +#undef DIVC_SIMD + //------------------------- // // Fluid kernels: AbsDiffC @@ -1544,7 +1952,7 @@ MULC_SIMD(float, float) #define ABSDIFFC_SIMD(SRC) \ int absdiffc_simd(const SRC in[], const float scalar[], SRC out[], \ - const int length, const int chan) \ + const int length, const int chan) \ { \ switch (chan) \ { \ @@ -1568,6 +1976,144 @@ ABSDIFFC_SIMD(float) #undef ABSDIFFC_SIMD +//------------------------- +// +// Fluid kernels: Split3 +// +//------------------------- + +int split3_simd(const uchar in[], uchar out1[], uchar out2[], + uchar out3[], const int width) +{ + constexpr int nlanes = v_uint8::nlanes; + if (width < nlanes) + return 0; + + int x = 0; + for (;;) + { + for (; x <= width - nlanes; x += nlanes) + { + v_uint8 a, b, c; + v_load_deinterleave(&in[3 * x], a, b, c); + vx_store(&out1[x], a); + vx_store(&out2[x], b); + vx_store(&out3[x], c); + } + if (x < width) + { + x = width - nlanes; + continue; + } + break; + } + return x; +} + +//------------------------- +// +// Fluid kernels: Split4 +// +//------------------------- + +int split4_simd(const uchar in[], uchar out1[], uchar out2[], + uchar out3[], uchar out4[], const int width) +{ + constexpr int nlanes = v_uint8::nlanes; + if (width < nlanes) + return 0; + + int x = 0; + for (;;) + { + for (; x <= width - nlanes; x += nlanes) + { + v_uint8 a, b, c, d; + v_load_deinterleave(&in[4 * x], a, b, c, d); + vx_store(&out1[x], a); + vx_store(&out2[x], b); + vx_store(&out3[x], c); + vx_store(&out4[x], d); + } + if (x < width) + { + x = width - nlanes; + continue; + } + break; + } + return x; +} + +//------------------------- +// +// Fluid kernels: Merge3 +// +//------------------------- + +int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], + uchar out[], const int width) +{ + constexpr int nlanes = v_uint8::nlanes; + if (width < nlanes) + return 0; + + int x = 0; + for (;;) + { + for (; x <= width - nlanes; x += nlanes) + { + v_uint8 a, b, c; + a = vx_load(&in1[x]); + b = vx_load(&in2[x]); + c = vx_load(&in3[x]); + v_store_interleave(&out[3 * x], a, b, c); + } + if (x < width) + { + x = width - nlanes; + continue; + } + break; + } + return x; +} + +//------------------------- +// +// Fluid kernels: Merge4 +// +//------------------------- + +int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], + const uchar in4[], uchar out[], const int width) +{ + constexpr int nlanes = v_uint8::nlanes; + if (width < nlanes) + return 0; + + int x = 0; + for (;;) + { + for (; x <= width - nlanes; x += nlanes) + { + v_uint8 a, b, c, d; + a = vx_load(&in1[x]); + b = vx_load(&in2[x]); + c = vx_load(&in3[x]); + d = vx_load(&in4[x]); + v_store_interleave(&out[4 * x], a, b, c, d); + } + if (x < width) + { + x = width - nlanes; + continue; + } + break; + } + return x; +} + #endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY CV_CPU_OPTIMIZATION_NAMESPACE_END diff --git a/modules/gapi/src/backends/fluid/gfluidcore_simd_sse41.hpp b/modules/gapi/src/backends/fluid/gfluidcore_simd_sse41.hpp index 02fff30977..3f2012807e 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_simd_sse41.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_simd_sse41.hpp @@ -28,7 +28,7 @@ namespace cv { namespace gapi { namespace fluid { -namespace sse42 { +namespace sse41 { CV_ALWAYS_INLINE void v_gather_pixel_map(v_uint8x16& vec, const uchar src[], const short* index, const int pos) { @@ -216,8 +216,8 @@ CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl_<3>(uint8_t* dst[], const int lpi) { bool xRatioEq = inSz.width == outSz.width; bool yRatioEq = inSz.height == outSz.height; - constexpr int nlanes = 16; - constexpr int half_nlanes = 16 / 2; + constexpr int nlanes = 16; // number of 8-bit integers that fit into a 128-bit SIMD vector. + constexpr int half_nlanes = nlanes / 2; constexpr int chanNum = 3; if (!xRatioEq && !yRatioEq) { @@ -235,7 +235,7 @@ CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl_<3>(uint8_t* dst[], for (int w = 0; w < inSz.width * chanNum; ) { for (; w <= inSz.width * chanNum - half_nlanes && w >= 0; w += half_nlanes) { -#ifdef __i386__ +#if defined(__i386__) || defined(_M_IX86) __m128i val0lo = _mm_castpd_si128(_mm_loadh_pd( _mm_load_sd(reinterpret_cast(&src0[0][w])), reinterpret_cast(&src0[1][w]))); @@ -298,84 +298,36 @@ CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl_<3>(uint8_t* dst[], // horizontal pass __m128i horizontal_shuf_mask = _mm_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); - - for (int x = 0; outSz.width >= nlanes; ) + __m128i horizontal_shuf_mask1 = _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 3, 7, 11, 15); + constexpr int nproc_pixels = 5; + for (int x = 0; ; ) { - for (; x <= outSz.width - nlanes; x += nlanes) + for (; x <= outSz.width - (nproc_pixels + 1); x += nproc_pixels) { -#ifdef _WIN64 +#ifdef _MSC_VER __m128i a00 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * x]), *reinterpret_cast(&clone[4 * x])); - __m128i a01 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * x]), *reinterpret_cast(&clone[4 * (x + 1)])); - __m128i a11 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 1)]), *reinterpret_cast(&clone[4 * (x + 1)])); - __m128i a22 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 2)]), *reinterpret_cast(&clone[4 * (x + 2)])); - __m128i a23 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 2)]), *reinterpret_cast(&clone[4 * (x + 3)])); - __m128i a33 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 3)]), *reinterpret_cast(&clone[4 * (x + 3)])); - __m128i a44 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 4)]), *reinterpret_cast(&clone[4 * (x + 4)])); - __m128i a45 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 4)]), *reinterpret_cast(&clone[4 * (x + 5)])); - __m128i a55 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 5)]), *reinterpret_cast(&clone[4 * (x + 5)])); - __m128i a66 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 6)]), *reinterpret_cast(&clone[4 * (x + 6)])); - __m128i a67 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 6)]), *reinterpret_cast(&clone[4 * (x + 7)])); - __m128i a77 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 7)]), *reinterpret_cast(&clone[4 * (x + 7)])); - __m128i a88 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 8)]), *reinterpret_cast(&clone[4 * (x + 8)])); - __m128i a89 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 8)]), *reinterpret_cast(&clone[4 * (x + 9)])); - __m128i a99 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 9)]), *reinterpret_cast(&clone[4 * (x + 9)])); - __m128i a1010 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 10)]), *reinterpret_cast(&clone[4 * (x + 10)])); - __m128i a1011 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 10)]), *reinterpret_cast(&clone[4 * (x + 11)])); - __m128i a1111 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 11)]), *reinterpret_cast(&clone[4 * (x + 11)])); - __m128i a1212 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 12)]), *reinterpret_cast(&clone[4 * (x + 12)])); - __m128i a1213 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 12)]), *reinterpret_cast(&clone[4 * (x + 13)])); - __m128i a1313 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 13)]), *reinterpret_cast(&clone[4 * (x + 13)])); - __m128i a1414 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 14)]), *reinterpret_cast(&clone[4 * (x + 14)])); - __m128i a1415 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 14)]), *reinterpret_cast(&clone[4 * (x + 15)])); - __m128i a1515 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 15)]), *reinterpret_cast(&clone[4 * (x + 15)])); #else __m128i a00 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * x]), *reinterpret_cast(&clone[4 * x])); - __m128i a01 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * x]), *reinterpret_cast(&clone[4 * (x + 1)])); - __m128i a11 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 1)]), *reinterpret_cast(&clone[4 * (x + 1)])); - __m128i a22 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 2)]), *reinterpret_cast(&clone[4 * (x + 2)])); - __m128i a23 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 2)]), *reinterpret_cast(&clone[4 * (x + 3)])); - __m128i a33 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 3)]), *reinterpret_cast(&clone[4 * (x + 3)])); - __m128i a44 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 4)]), *reinterpret_cast(&clone[4 * (x + 4)])); - __m128i a45 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 4)]), *reinterpret_cast(&clone[4 * (x + 5)])); - __m128i a55 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 5)]), *reinterpret_cast(&clone[4 * (x + 5)])); - __m128i a66 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 6)]), *reinterpret_cast(&clone[4 * (x + 6)])); - __m128i a67 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 6)]), *reinterpret_cast(&clone[4 * (x + 7)])); - __m128i a77 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 7)]), *reinterpret_cast(&clone[4 * (x + 7)])); - __m128i a88 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 8)]), *reinterpret_cast(&clone[4 * (x + 8)])); - __m128i a89 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 8)]), *reinterpret_cast(&clone[4 * (x + 9)])); - __m128i a99 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 9)]), *reinterpret_cast(&clone[4 * (x + 9)])); - __m128i a1010 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 10)]), *reinterpret_cast(&clone[4 * (x + 10)])); - __m128i a1011 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 10)]), *reinterpret_cast(&clone[4 * (x + 11)])); - __m128i a1111 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 11)]), *reinterpret_cast(&clone[4 * (x + 11)])); - __m128i a1212 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 12)]), *reinterpret_cast(&clone[4 * (x + 12)])); - __m128i a1213 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 12)]), *reinterpret_cast(&clone[4 * (x + 13)])); - __m128i a1313 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 13)]), *reinterpret_cast(&clone[4 * (x + 13)])); - __m128i a1414 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 14)]), *reinterpret_cast(&clone[4 * (x + 14)])); - __m128i a1415 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 14)]), *reinterpret_cast(&clone[4 * (x + 15)])); - __m128i a1515 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 15)]), *reinterpret_cast(&clone[4 * (x + 15)])); +#endif + __m128i pix1 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * mapsx[x])])); + __m128i pix2 = _mm_setzero_si128(); +#if defined(__i386__) || defined(_M_IX86) + pix2 = _mm_castpd_si128(_mm_load_sd(reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x] + 1))]))); +#else + pix2 = _mm_insert_epi64(pix2, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x] + 1))]), 0); #endif - // load 3 channels of first pixel from first pair of 4-couple scope - __m128i pix1 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * mapsx[x])])); - // insert first channel from next couple of pixels to completely fill the simd vector - pix1 = _mm_insert_epi32(pix1, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 1])]), 3); - - // load 3 channels of neighbor pixel from first pair of 4-couple scope - __m128i pix2 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x] + 1))])); - // insert first channel from next couple of pixels to completely fill the simd vector - pix2 = _mm_insert_epi32(pix2, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 1] + 1))]), 3); + pix2 = _mm_insert_epi32(pix2, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x] + 1)) + 8]), 2); // expand 8-bit data to 16-bit __m128i val_0 = _mm_unpacklo_epi8(pix1, zero); __m128i val_1 = _mm_unpacklo_epi8(pix2, zero); - - // expand 8-bit data to 16-bit __m128i val_2 = _mm_unpackhi_epi8(pix1, zero); __m128i val_3 = _mm_unpackhi_epi8(pix2, zero); // the main calculations __m128i t0_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a00); - __m128i t1_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a01); + __m128i t1_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a00); __m128i r0_0 = _mm_add_epi16(val_1, t0_0); __m128i r1_0 = _mm_add_epi16(val_3, t1_0); @@ -384,111 +336,129 @@ CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl_<3>(uint8_t* dst[], // gather data from the same lines together __m128i res1 = _mm_shuffle_epi8(q0_0, horizontal_shuf_mask); - val_0 = _mm_unpacklo_epi8(_mm_insert_epi64(val_0, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 1] + 1)]), 0), zero); - val_1 = _mm_unpacklo_epi8(_mm_insert_epi64(val_1, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 1] + 1) + 1)]), 0), zero); +#ifdef _MSC_VER + __m128i a11 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 1)]), *reinterpret_cast(&clone[4 * (x + 1)])); +#else + __m128i a11 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 1)]), *reinterpret_cast(&clone[4 * (x + 1)])); +#endif - val_2 = _mm_insert_epi64(val_2, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 2])]), 0); - val_3 = _mm_insert_epi64(val_3, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 2] + 1))]), 0); - - val_2 = _mm_unpacklo_epi8(val_2, zero); - val_3 = _mm_unpacklo_epi8(val_3, zero); - - __m128i t0_1 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a11); - __m128i t1_1 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a22); - __m128i r0_1 = _mm_add_epi16(val_1, t0_1); - __m128i r1_1 = _mm_add_epi16(val_3, t1_1); - - __m128i q0_1 = _mm_packus_epi16(r0_1, r1_1); - __m128i res2 = _mm_shuffle_epi8(q0_1, horizontal_shuf_mask); - - __m128i pix7 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 3] - 1) + 2)])); - pix7 = _mm_insert_epi32(pix7, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 2] + 2)]), 0); - - __m128i pix8 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 3] + 2)])); - pix8 = _mm_insert_epi32(pix8, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 2] + 1) + 2)]), 0); - - val_0 = _mm_unpacklo_epi8(pix7, zero); - val_1 = _mm_unpacklo_epi8(pix8, zero); - - val_2 = _mm_unpackhi_epi8(pix7, zero); - val_3 = _mm_unpackhi_epi8(pix8, zero); - - // the main calculations - __m128i t0_2 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a23); - __m128i t1_2 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a33); - __m128i r0_2 = _mm_add_epi16(val_1, t0_2); - __m128i r1_2 = _mm_add_epi16(val_3, t1_2); - - // pack 16-bit data to 8-bit - __m128i q0_2 = _mm_packus_epi16(r0_2, r1_2); - __m128i res3 = _mm_shuffle_epi8(q0_2, horizontal_shuf_mask); - - __m128i pix9 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 4])])); - // insert first channel from next couple of pixels to completely fill the simd vector - pix9 = _mm_insert_epi32(pix9, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 5])]), 3); - - // load 3 channels of neighbor pixel from first pair of 4-couple scope - __m128i pix10 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 4] + 1))])); - // insert first channel from next couple of pixels to completely fill the simd vector - pix10 = _mm_insert_epi32(pix10, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 5] + 1))]), 3); + pix1 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 1])])); +#if defined(__i386__) || defined(_M_IX86) + pix2 = _mm_castpd_si128(_mm_load_sd(reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 1] + 1))]))); +#else + pix2 = _mm_insert_epi64(pix2, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 1] + 1))]), 0); +#endif + pix2 = _mm_insert_epi32(pix2, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 1] + 1)) + 8]), 2); // expand 8-bit data to 16-bit - val_0 = _mm_unpacklo_epi8(pix9, zero); - val_1 = _mm_unpacklo_epi8(pix10, zero); - - // expand 8-bit data to 16-bit - val_2 = _mm_unpackhi_epi8(pix9, zero); - val_3 = _mm_unpackhi_epi8(pix10, zero); + val_0 = _mm_unpacklo_epi8(pix1, zero); + val_1 = _mm_unpacklo_epi8(pix2, zero); + val_2 = _mm_unpackhi_epi8(pix1, zero); + val_3 = _mm_unpackhi_epi8(pix2, zero); // the main calculations - __m128i t0_3 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a44); - __m128i t1_3 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a45); - __m128i r0_3 = _mm_add_epi16(val_1, t0_3); - __m128i r1_3 = _mm_add_epi16(val_3, t1_3); + t0_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a11); + t1_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a11); + r0_0 = _mm_add_epi16(val_1, t0_0); + r1_0 = _mm_add_epi16(val_3, t1_0); // pack 16-bit data to 8-bit - __m128i q0_3 = _mm_packus_epi16(r0_3, r1_3); + q0_0 = _mm_packus_epi16(r0_0, r1_0); // gather data from the same lines together - __m128i res4 = _mm_shuffle_epi8(q0_3, horizontal_shuf_mask); + __m128i res2 = _mm_shuffle_epi8(q0_0, horizontal_shuf_mask); - val_0 = _mm_unpacklo_epi8(_mm_insert_epi64(val_0, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 5] + 1)]), 0), zero); - val_1 = _mm_unpacklo_epi8(_mm_insert_epi64(val_1, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 5] + 1) + 1)]), 0), zero); +#ifdef _MSC_VER + __m128i a22 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 2)]), *reinterpret_cast(&clone[4 * (x + 2)])); +#else + __m128i a22 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 2)]), *reinterpret_cast(&clone[4 * (x + 2)])); +#endif - val_2 = _mm_insert_epi64(val_2, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 6])]), 0); - val_3 = _mm_insert_epi64(val_3, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 6] + 1))]), 0); + pix1 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 2])])); +#if defined(__i386__) || defined(_M_IX86) + pix2 = _mm_castpd_si128(_mm_load_sd(reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 2] + 1))]))); +#else + pix2 = _mm_insert_epi64(pix2, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 2] + 1))]), 0); +#endif + pix2 = _mm_insert_epi32(pix2, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 2] + 1)) + 8]), 2); - val_2 = _mm_unpacklo_epi8(val_2, zero); - val_3 = _mm_unpacklo_epi8(val_3, zero); - - __m128i t0_4 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a55); - __m128i t1_4 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a66); - __m128i r0_4 = _mm_add_epi16(val_1, t0_4); - __m128i r1_4 = _mm_add_epi16(val_3, t1_4); - - __m128i q0_4 = _mm_packus_epi16(r0_4, r1_4); - __m128i res5 = _mm_shuffle_epi8(q0_4, horizontal_shuf_mask); - - __m128i pix15 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 7] - 1) + 2)])); - pix15 = _mm_insert_epi32(pix15, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 6] + 2)]), 0); - - __m128i pix16 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 7] + 2)])); - pix16 = _mm_insert_epi32(pix16, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 6] + 1) + 2)]), 0); - - val_0 = _mm_unpacklo_epi8(pix15, zero); - val_1 = _mm_unpacklo_epi8(pix16, zero); - - val_2 = _mm_unpackhi_epi8(pix15, zero); - val_3 = _mm_unpackhi_epi8(pix16, zero); + // expand 8-bit data to 16-bit + val_0 = _mm_unpacklo_epi8(pix1, zero); + val_1 = _mm_unpacklo_epi8(pix2, zero); + val_2 = _mm_unpackhi_epi8(pix1, zero); + val_3 = _mm_unpackhi_epi8(pix2, zero); // the main calculations - __m128i t0_5 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a67); - __m128i t1_5 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a77); - __m128i r0_5 = _mm_add_epi16(val_1, t0_5); - __m128i r1_5 = _mm_add_epi16(val_3, t1_5); + t0_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a22); + t1_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a22); + r0_0 = _mm_add_epi16(val_1, t0_0); + r1_0 = _mm_add_epi16(val_3, t1_0); // pack 16-bit data to 8-bit - __m128i q0_5 = _mm_packus_epi16(r0_5, r1_5); - __m128i res6 = _mm_shuffle_epi8(q0_5, horizontal_shuf_mask); + q0_0 = _mm_packus_epi16(r0_0, r1_0); + // gather data from the same lines together + __m128i res3 = _mm_shuffle_epi8(q0_0, horizontal_shuf_mask); + +#ifdef _MSC_VER + __m128i a33 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 3)]), *reinterpret_cast(&clone[4 * (x + 3)])); +#else + __m128i a33 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 3)]), *reinterpret_cast(&clone[4 * (x + 3)])); +#endif + + pix1 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 3])])); +#if defined(__i386__) || defined(_M_IX86) + pix2 = _mm_castpd_si128(_mm_load_sd(reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 3] + 1))]))); +#else + pix2 = _mm_insert_epi64(pix2, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 3] + 1))]), 0); +#endif + pix2 = _mm_insert_epi32(pix2, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 3] + 1)) + 8]), 2); + + // expand 8-bit data to 16-bit + val_0 = _mm_unpacklo_epi8(pix1, zero); + val_1 = _mm_unpacklo_epi8(pix2, zero); + val_2 = _mm_unpackhi_epi8(pix1, zero); + val_3 = _mm_unpackhi_epi8(pix2, zero); + + // the main calculations + t0_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a33); + t1_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a33); + r0_0 = _mm_add_epi16(val_1, t0_0); + r1_0 = _mm_add_epi16(val_3, t1_0); + + // pack 16-bit data to 8-bit + q0_0 = _mm_packus_epi16(r0_0, r1_0); + // gather data from the same lines together + __m128i res4 = _mm_shuffle_epi8(q0_0, horizontal_shuf_mask); + +#ifdef _MSC_VER + __m128i a44 = _mm_setr_epi64x(*reinterpret_cast(&clone[4 * (x + 4)]), *reinterpret_cast(&clone[4 * (x + 4)])); +#else + __m128i a44 = _mm_setr_epi64(*reinterpret_cast(&clone[4 * (x + 4)]), *reinterpret_cast(&clone[4 * (x + 4)])); +#endif + + pix1 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 4])])); +#if defined(__i386__) || defined(_M_IX86) + pix2 = _mm_castpd_si128(_mm_load_sd(reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 4] + 1))]))); +#else + pix2 = _mm_insert_epi64(pix2, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 4] + 1))]), 0); +#endif + pix2 = _mm_insert_epi32(pix2, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 4] + 1)) + 8]), 2); + + // expand 8-bit data to 16-bit + val_0 = _mm_unpacklo_epi8(pix1, zero); + val_1 = _mm_unpacklo_epi8(pix2, zero); + val_2 = _mm_unpackhi_epi8(pix1, zero); + val_3 = _mm_unpackhi_epi8(pix2, zero); + + // the main calculations + t0_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a44); + t1_0 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a44); + r0_0 = _mm_add_epi16(val_1, t0_0); + r1_0 = _mm_add_epi16(val_3, t1_0); + + // pack 16-bit data to 8-bit + q0_0 = _mm_packus_epi16(r0_0, r1_0); + // gather data from the same lines together + __m128i res5 = _mm_shuffle_epi8(q0_0, horizontal_shuf_mask); __m128i bl1 = _mm_blend_epi16(res1, _mm_slli_si128(res2, 4), 0xCC /*0b11001100*/); __m128i bl2 = _mm_blend_epi16(_mm_srli_si128(res1, 4), res2, 0xCC /*0b11001100*/); @@ -496,189 +466,47 @@ CV_ALWAYS_INLINE void calcRowLinear_8UC_Impl_<3>(uint8_t* dst[], __m128i bl3 = _mm_blend_epi16(res3, _mm_slli_si128(res4, 4), 0xCC /*0b11001100*/); __m128i bl4 = _mm_blend_epi16(_mm_srli_si128(res3, 4), res4, 0xCC /*0b11001100*/); - __m128i bl5 = _mm_blend_epi16(res5, _mm_slli_si128(res6, 4), 0xCC /*0b11001100*/); - __m128i bl6 = _mm_blend_epi16(_mm_srli_si128(res5, 4), res6, 0xCC /*0b11001100*/); - __m128i bl13 = _mm_blend_epi16(bl1, _mm_slli_si128(bl3, 8), 0xF0 /*0b11110000*/); __m128i bl31 = _mm_blend_epi16(_mm_srli_si128(bl1, 8), bl3, 0xF0 /*0b11110000*/); __m128i bl24 = _mm_blend_epi16(bl2, _mm_slli_si128(bl4, 8), 0xF0 /*0b11110000*/); __m128i bl42 = _mm_blend_epi16(_mm_srli_si128(bl2, 8), bl4, 0xF0 /*0b11110000*/); - // load 3 channels of first pixel from first pair of 4-couple scope - __m128i pix17 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 8])])); - // insert first channel from next couple of pixels to completely fill the simd vector - pix17 = _mm_insert_epi32(pix17, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 9])]), 3); + bl1 = _mm_blend_epi16(_mm_shuffle_epi8(bl13, horizontal_shuf_mask1), + _mm_slli_si128(res5, 12), 192 /*0b11000000*/); + bl2 = _mm_blend_epi16(_mm_shuffle_epi8(bl24, horizontal_shuf_mask1), + _mm_slli_si128(res5, 8), 192 /*0b11000000*/); + bl3 = _mm_blend_epi16(_mm_shuffle_epi8(bl31, horizontal_shuf_mask1), + _mm_slli_si128(res5, 4), 192 /*0b11000000*/); + bl4 = _mm_blend_epi16(_mm_shuffle_epi8(bl42, horizontal_shuf_mask1), + res5, 192 /*0b11000000*/); - // load 3 channels of neighbor pixel from first pair of 4-couple scope - __m128i pix18 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 8] + 1))])); - // insert first channel from next couple of pixels to completely fill the simd vector - pix18 = _mm_insert_epi32(pix18, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 9] + 1))]), 3); - - // expand 8-bit data to 16-bit - val_0 = _mm_unpacklo_epi8(pix17, zero); - val_1 = _mm_unpacklo_epi8(pix18, zero); - - // expand 8-bit data to 16-bit - val_2 = _mm_unpackhi_epi8(pix17, zero); - val_3 = _mm_unpackhi_epi8(pix18, zero); - - // the main calculations - __m128i t0_6 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a88); - __m128i t1_6 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a89); - __m128i r0_6 = _mm_add_epi16(val_1, t0_6); - __m128i r1_6 = _mm_add_epi16(val_3, t1_6); - - // pack 16-bit data to 8-bit - __m128i q0_6 = _mm_packus_epi16(r0_6, r1_6); - // gather data from the same lines together - __m128i res7 = _mm_shuffle_epi8(q0_6, horizontal_shuf_mask); - - val_0 = _mm_unpacklo_epi8(_mm_insert_epi64(val_0, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 9] + 1)]), 0), zero); - val_1 = _mm_unpacklo_epi8(_mm_insert_epi64(val_1, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 9] + 1) + 1)]), 0), zero); - - val_2 = _mm_insert_epi64(val_2, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 10])]), 0); - val_3 = _mm_insert_epi64(val_3, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 10] + 1))]), 0); - - val_2 = _mm_unpacklo_epi8(val_2, zero); - val_3 = _mm_unpacklo_epi8(val_3, zero); - - __m128i t0_7 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a99); - __m128i t1_7 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a1010); - __m128i r0_7 = _mm_add_epi16(val_1, t0_7); - __m128i r1_7 = _mm_add_epi16(val_3, t1_7); - - __m128i q0_7 = _mm_packus_epi16(r0_7, r1_7); - __m128i res8 = _mm_shuffle_epi8(q0_7, horizontal_shuf_mask); - - __m128i pix21 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 11] - 1) + 2)])); - pix21 = _mm_insert_epi32(pix21, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 10] + 2)]), 0); - - __m128i pix22 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 11] + 2)])); - pix22 = _mm_insert_epi32(pix22, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 10] + 1) + 2)]), 0); - - val_0 = _mm_unpacklo_epi8(pix21, zero); - val_1 = _mm_unpacklo_epi8(pix22, zero); - - val_2 = _mm_unpackhi_epi8(pix21, zero); - val_3 = _mm_unpackhi_epi8(pix22, zero); - - // the main calculations - __m128i t0_8 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a1011); - __m128i t1_8 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a1111); - __m128i r0_8 = _mm_add_epi16(val_1, t0_8); - __m128i r1_8 = _mm_add_epi16(val_3, t1_8); - - // pack 16-bit data to 8-bit - __m128i q0_8 = _mm_packus_epi16(r0_8, r1_8); - __m128i res9 = _mm_shuffle_epi8(q0_8, horizontal_shuf_mask); - - __m128i pix23 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 12])])); - // insert first channel from next couple of pixels to completely fill the simd vector - pix23 = _mm_insert_epi32(pix23, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 13])]), 3); - - // load 3 channels of neighbor pixel from first pair of 4-couple scope - __m128i pix24 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 12] + 1))])); - // insert first channel from next couple of pixels to completely fill the simd vector - pix24 = _mm_insert_epi32(pix24, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 13] + 1))]), 3); - - // expand 8-bit data to 16-bit - val_0 = _mm_unpacklo_epi8(pix23, zero); - val_1 = _mm_unpacklo_epi8(pix24, zero); - - // expand 8-bit data to 16-bit - val_2 = _mm_unpackhi_epi8(pix23, zero); - val_3 = _mm_unpackhi_epi8(pix24, zero); - - // the main calculations - __m128i t0_9 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a1212); - __m128i t1_9 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a1213); - __m128i r0_9 = _mm_add_epi16(val_1, t0_9); - __m128i r1_9 = _mm_add_epi16(val_3, t1_9); - - // pack 16-bit data to 8-bit - __m128i q0_9 = _mm_packus_epi16(r0_9, r1_9); - // gather data from the same lines together - __m128i res10 = _mm_shuffle_epi8(q0_9, horizontal_shuf_mask); - - val_0 = _mm_unpacklo_epi8(_mm_insert_epi64(val_0, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 13] + 1)]), 0), zero); - val_1 = _mm_unpacklo_epi8(_mm_insert_epi64(val_1, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 13] + 1) + 1)]), 0), zero); - - val_2 = _mm_insert_epi64(val_2, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 14])]), 0); - val_3 = _mm_insert_epi64(val_3, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 14] + 1))]), 0); - - val_2 = _mm_unpacklo_epi8(val_2, zero); - val_3 = _mm_unpacklo_epi8(val_3, zero); - - __m128i t0_10 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a1313); - __m128i t1_10 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a1414); - __m128i r0_10 = _mm_add_epi16(val_1, t0_10); - __m128i r1_10 = _mm_add_epi16(val_3, t1_10); - - __m128i q0_10 = _mm_packus_epi16(r0_10, r1_10); - __m128i res11 = _mm_shuffle_epi8(q0_10, horizontal_shuf_mask); - - __m128i pix27 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 15] - 1) + 2)])); - pix27 = _mm_insert_epi32(pix27, *reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 14] + 2)]), 0); - - __m128i pix28 = _mm_lddqu_si128(reinterpret_cast(&tmp[4 * (chanNum * mapsx[x + 15] + 2)])); - pix28 = _mm_insert_epi32(pix28, *reinterpret_cast(&tmp[4 * (chanNum * (mapsx[x + 14] + 1) + 2)]), 0); - - val_0 = _mm_unpacklo_epi8(pix27, zero); - val_1 = _mm_unpacklo_epi8(pix28, zero); - - val_2 = _mm_unpackhi_epi8(pix27, zero); - val_3 = _mm_unpackhi_epi8(pix28, zero); - - // the main calculations - __m128i t0_11 = _mm_mulhrs_epi16(_mm_sub_epi16(val_0, val_1), a1415); - __m128i t1_11 = _mm_mulhrs_epi16(_mm_sub_epi16(val_2, val_3), a1515); - __m128i r0_11 = _mm_add_epi16(val_1, t0_11); - __m128i r1_11 = _mm_add_epi16(val_3, t1_11); - - // pack 16-bit data to 8-bit - __m128i q0_11 = _mm_packus_epi16(r0_11, r1_11); - __m128i res12 = _mm_shuffle_epi8(q0_11, horizontal_shuf_mask); - - __m128i bl7 = _mm_blend_epi16(res7, _mm_slli_si128(res8, 4), 0xCC /*0b11001100*/); - __m128i bl8 = _mm_blend_epi16(_mm_srli_si128(res7, 4), res8, 0xCC /*0b11001100*/); - - __m128i bl9 = _mm_blend_epi16(res9, _mm_slli_si128(res10, 4), 0xCC /*0b11001100*/); - __m128i bl10 = _mm_blend_epi16(_mm_srli_si128(res9, 4), res10, 0xCC /*0b11001100*/); - - __m128i bl11 = _mm_blend_epi16(res11, _mm_slli_si128(res12, 4), 0xCC /*0b11001100*/); - __m128i bl12 = _mm_blend_epi16(_mm_srli_si128(res11, 4), res12, 0xCC /*0b11001100*/); - - __m128i bl57 = _mm_blend_epi16(bl5, _mm_slli_si128(bl7, 8), 0xF0 /*0b11110000*/); - __m128i bl75 = _mm_blend_epi16(_mm_srli_si128(bl5, 8), bl7, 0xF0 /*0b11110000*/); - - __m128i bl68 = _mm_blend_epi16(bl6, _mm_slli_si128(bl8, 8), 0xF0 /*0b11110000*/); - __m128i bl86 = _mm_blend_epi16(_mm_srli_si128(bl6, 8), bl8, 0xF0 /*0b11110000*/); - - __m128i bl911 = _mm_blend_epi16(bl9, _mm_slli_si128(bl11, 8), 0xF0 /*0b11110000*/); - __m128i bl119 = _mm_blend_epi16(_mm_srli_si128(bl9, 8), bl11, 0xF0 /*0b11110000*/); - - __m128i bl1012 = _mm_blend_epi16(bl10, _mm_slli_si128(bl12, 8), 0xF0 /*0b11110000*/); - __m128i bl1210 = _mm_blend_epi16(_mm_srli_si128(bl10, 8), bl12, 0xF0 /*0b11110000*/); - - _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[0][3 * x]), bl13); - _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[1][3 * x]), bl24); - _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[2][3 * x]), bl31); - _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[3][3 * x]), bl42); - _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[0][3 * x + 16]), bl57); - _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[1][3 * x + 16]), bl68); - _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[2][3 * x + 16]), bl75); - _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[3][3 * x + 16]), bl86); - _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[0][3 * x + 32]), bl911); - _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[1][3 * x + 32]), bl1012); - _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[2][3 * x + 32]), bl119); - _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[3][3 * x + 32]), bl1210); + _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[0][chanNum * x]), bl1); + _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[1][chanNum * x]), bl2); + _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[2][chanNum * x]), bl3); + _mm_storeu_si128(reinterpret_cast<__m128i*>(&dst[3][chanNum * x]), bl4); } - if (x < outSz.width) { - x = outSz.width - nlanes; - continue; + for (; x < outSz.width; ++x) + { + constexpr static const int ONE = 1 << 15; + constexpr static const int half = 1 << 14; + auto alpha0 = alpha[x]; + auto alpha1 = saturate_cast(ONE - alpha[x]); + + for (int c = 0; c < chanNum; ++c) + { + dst[0][chanNum * x + c] = (tmp[4 * (chanNum * mapsx[x] + c) ] * alpha0 + + tmp[4 * (chanNum * (mapsx[x] + 1) + c) ] * alpha1 + half) >> 15; + dst[1][chanNum * x + c] = (tmp[4 * (chanNum * mapsx[x] + c) + 1] * alpha0 + + tmp[4 * (chanNum * (mapsx[x] + 1) + c) + 1] * alpha1 + half) >> 15; + dst[2][chanNum * x + c] = (tmp[4 * (chanNum * mapsx[x] + c) + 2] * alpha0 + + tmp[4 * (chanNum * (mapsx[x] + 1) + c) + 2] * alpha1 + half) >> 15; + dst[3][chanNum * x + c] = (tmp[4 * (chanNum * mapsx[x] + c) + 3] * alpha0 + + tmp[4 * (chanNum * (mapsx[x] + 1) + c) + 3] * alpha1 + half) >> 15; + } } + break; } } diff --git a/modules/gapi/src/backends/fluid/gfluidimgproc.cpp b/modules/gapi/src/backends/fluid/gfluidimgproc.cpp index df44e57259..aca2dcca6f 100644 --- a/modules/gapi/src/backends/fluid/gfluidimgproc.cpp +++ b/modules/gapi/src/backends/fluid/gfluidimgproc.cpp @@ -1026,8 +1026,8 @@ GAPI_FLUID_KERNEL(GFluidSobel, cv::gapi::imgproc::GSobel, true) auto *kx = scratch.OutLine(); auto *ky = kx + ksz; - Mat kxmat(1, ksize, CV_32FC1, kx); - Mat kymat(ksize, 1, CV_32FC1, ky); + Mat kxmat(1, ksz, CV_32FC1, kx); + Mat kymat(ksz, 1, CV_32FC1, ky); getDerivKernels(kxmat, kymat, dx, dy, ksize); } @@ -1185,12 +1185,12 @@ GAPI_FLUID_KERNEL(GFluidSobelXY, cv::gapi::imgproc::GSobelXY, true) auto *kx_dy = buf_helper.kx_dy; auto *ky_dy = buf_helper.ky_dy; - Mat kxmatX(1, ksize, CV_32FC1, kx_dx); - Mat kymatX(ksize, 1, CV_32FC1, ky_dx); + Mat kxmatX(1, ksz, CV_32FC1, kx_dx); + Mat kymatX(ksz, 1, CV_32FC1, ky_dx); getDerivKernels(kxmatX, kymatX, order, 0, ksize); - Mat kxmatY(1, ksize, CV_32FC1, kx_dy); - Mat kymatY(ksize, 1, CV_32FC1, ky_dy); + Mat kxmatY(1, ksz, CV_32FC1, kx_dy); + Mat kymatY(ksz, 1, CV_32FC1, ky_dy); getDerivKernels(kxmatY, kymatY, 0, order, ksize); } @@ -2017,14 +2017,13 @@ static void calcRowLinearC(const cv::gapi::fluid::View & in, dst[l] = out.OutLine(l); } -#if 0 // Disabling SSE4.1 path due to Valgrind issues: https://github.com/opencv/opencv/issues/21097 #if CV_SSE4_1 const auto* clone = scr.clone; auto* tmp = scr.tmp; if (inSz.width >= 16 && outSz.width >= 16) { - sse42::calcRowLinear_8UC_Impl_(reinterpret_cast(dst), + sse41::calcRowLinear_8UC_Impl_(reinterpret_cast(dst), reinterpret_cast(src0), reinterpret_cast(src1), reinterpret_cast(alpha), @@ -2037,7 +2036,6 @@ static void calcRowLinearC(const cv::gapi::fluid::View & in, return; } #endif // CV_SSE4_1 -#endif int length = out.length(); for (int l = 0; l < lpi; l++) { constexpr static const auto unity = Mapper::unity; @@ -2080,8 +2078,8 @@ GAPI_FLUID_KERNEL(GFluidResize, cv::gapi::imgproc::GResize, true) int outSz_h; if (outSz.width == 0 || outSz.height == 0) { - outSz_w = static_cast(round(in.size.width * fx)); - outSz_h = static_cast(round(in.size.height * fy)); + outSz_w = saturate_cast(in.size.width * fx); + outSz_h = saturate_cast(in.size.height * fy); } else { diff --git a/modules/gapi/src/backends/ie/giebackend.cpp b/modules/gapi/src/backends/ie/giebackend.cpp index 929a18f721..b155ff0aea 100644 --- a/modules/gapi/src/backends/ie/giebackend.cpp +++ b/modules/gapi/src/backends/ie/giebackend.cpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018-2021 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation #include "precomp.hpp" @@ -19,6 +19,8 @@ #include #include #include +#include + #include @@ -180,6 +182,10 @@ inline IE::Blob::Ptr wrapIE(const cv::MediaFrame::View& view, auto uv_plane = cv::Mat(desc.size / 2, CV_8UC2, view.ptr[1], view.stride[1]); return cv::gapi::ie::util::to_ie(y_plane, uv_plane); } + case cv::MediaFormat::GRAY: { + auto gray = cv::Mat(desc.size, CV_8UC1, view.ptr[0], view.stride[0]); + return wrapIE(gray, cv::gapi::ie::TraitAs::IMAGE); + } default: GAPI_Assert(false && "Unsupported media format for IE backend"); } @@ -210,6 +216,39 @@ inline void copyFromIE(const IE::Blob::Ptr &blob, MatType &mat) { } } +template +void checkLayerNames(const MapT& network_map, + const std::vector& layer_names, + const std::string& layer_type) { + for (const auto& layer_name : layer_names) { + const auto it = network_map.find(layer_name); + if (it == network_map.end()) { + std::stringstream ss; + ss << "Failed to find " << layer_type << " layer with name: " + << "\"" << layer_name << "\"" << std::endl; + ss << "Network " << layer_type << " layers: " << std::endl; + for (const auto& p : network_map) { + const auto& desc = p.second->getTensorDesc(); + ss << p.first << " : " << desc.getPrecision() + << " / " << desc.getLayout() << std::endl; + } + throw std::logic_error(ss.str()); + } + } +} + +template +void checkInputLayerNames(const MapT& network_map, + const std::vector& layer_names) { + checkLayerNames(network_map, layer_names, "input"); +} + +template +void checkOutputLayerNames(const MapT& network_map, + const std::vector& layer_names) { + checkLayerNames(network_map, layer_names, "output"); +} + // IE-specific metadata, represents a network with its parameters struct IEUnit { static const char *name() { return "IEModelConfig"; } @@ -287,6 +326,16 @@ struct IEUnit { params.num_in && "Number of layers to reshape must be less than or equal to number of inputs"); } + + if (params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { + checkInputLayerNames(net.getInputsInfo(), params.input_names); + checkOutputLayerNames(net.getOutputsInfo(), params.output_names); + } else if (params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Import) { + checkInputLayerNames(this_network.GetInputsInfo(), params.input_names); + checkOutputLayerNames(this_network.GetOutputsInfo(), params.output_names); + } else { + cv::util::throw_error(std::logic_error("Unsupported ParamDesc::Kind")); + } } // This method is [supposed to be] called at Island compilation stage @@ -505,20 +554,27 @@ inline IE::Blob::Ptr extractRemoteBlob(IECallContext& ctx, std::size_t i) { "Remote blob is supported for MediaFrame only"); cv::util::any any_blob_params = ctx.inFrame(i).blobParams(); - auto ie_core = cv::gimpl::ie::wrap::getCore(); - using ParamType = std::pair; + using ParamType = std::pair; + using NV12ParamType = std::pair; - ParamType* blob_params = cv::util::any_cast(&any_blob_params); + NV12ParamType* blob_params = cv::util::any_cast(&any_blob_params); if (blob_params == nullptr) { - GAPI_Assert(false && "Incorrect type of blobParams: " - "expected std::pair"); + GAPI_Assert(false && "Incorrect type of blobParams:" + "expected std::pair," + "with ParamType std::pair>"); } - return ctx.uu.rctx->CreateBlob(blob_params->first, - blob_params->second); + //The parameters are TensorDesc and ParamMap for both y and uv blobs + auto y_blob = ctx.uu.rctx->CreateBlob(blob_params->first.first, blob_params->first.second); + auto uv_blob = ctx.uu.rctx->CreateBlob(blob_params->second.first, blob_params->second.second); + +#if INF_ENGINE_RELEASE >= 2021010000 + return IE::make_shared_blob(y_blob, uv_blob); +#else + return IE::make_shared_blob(y_blob, uv_blob); +#endif } inline IE::Blob::Ptr extractBlob(IECallContext& ctx, @@ -560,6 +616,19 @@ static void setBlob(InferenceEngine::InferRequest& req, } } +static void setROIBlob(InferenceEngine::InferRequest& req, + const std::string& layer_name, + const IE::Blob::Ptr& blob, + const cv::Rect &roi, + const IECallContext& ctx) { + if (ctx.uu.params.device_id.find("GPU") != std::string::npos) { + GAPI_LOG_DEBUG(nullptr, "Skip ROI blob creation for device_id: " << + ctx.uu.params.device_id << ", layer: " << layer_name); + setBlob(req, layer_name, blob, ctx); + } else { + setBlob(req, layer_name, IE::make_shared_blob(blob, toIE(roi)), ctx); + } +} } // anonymous namespace std::vector cv::gimpl::ie::IECompiled::createInferRequests() { @@ -601,7 +670,10 @@ public: void waitAll(); private: - void callback(Task task, InferenceEngine::InferRequest& request, size_t id); + void callback(Task task, + size_t id, + IE::InferRequest request, + IE::StatusCode code); void setup(); QueueClass m_idle_ids; @@ -626,21 +698,38 @@ void cv::gimpl::ie::RequestPool::execute(cv::gimpl::ie::RequestPool::Task&& t) { auto& request = m_requests[id]; + using namespace std::placeholders; + using callback_t = std::function; request.SetCompletionCallback( - std::bind(&cv::gimpl::ie::RequestPool::callback, this, t, std::ref(request), id)); + static_cast( + std::bind(&cv::gimpl::ie::RequestPool::callback, this, + t, id, _1, _2))); t.run(request); } void cv::gimpl::ie::RequestPool::callback(cv::gimpl::ie::RequestPool::Task task, - InferenceEngine::InferRequest& request, - size_t id) { - task.callback(request); - // NB: IE::InferRequest keeps the callback until the new one is set. - // Since user's callback might keep resources that should be released, - // need to destroy its after execution. - // Let's set the empty one to cause the destruction of a callback. - request.SetCompletionCallback([](){}); - m_idle_ids.push(id); + size_t id, + IE::InferRequest request, + IE::StatusCode code) { + // FIXME: Any exception which is arrised here must not leave this callback, + // because it won't be handled. + try { + if (code != IE::StatusCode::OK) { + throw std::logic_error("IE::InferRequest finished with not OK status"); + } + task.callback(request); + // NB: IE::InferRequest keeps the callback until the new one is set. + // Since user's callback might keep resources that should be released, + // need to destroy its after execution. + // Let's set the empty one to cause the destruction of a callback. + request.SetCompletionCallback([](){}); + m_idle_ids.push(id); + } catch (const std::exception& e) { + GAPI_LOG_FATAL(NULL, "Callback failed with error: " << e.what()); + //FIXME: Exception CAN't be rethrown here, since this callback works + // in separate IE thread and such scenarios aren't handled properly in + // G-API so far. + } } // NB: Not thread-safe. @@ -815,6 +904,9 @@ static void configureInputInfo(const IE::InputInfo::Ptr& ii, const cv::GMetaArg case cv::MediaFormat::BGR: // NB: Do nothing break; + case cv::MediaFormat::GRAY: + // NB: Do nothing + break; default: GAPI_Assert(false && "Unsupported media format for IE backend"); } @@ -826,6 +918,13 @@ static void configureInputInfo(const IE::InputInfo::Ptr& ii, const cv::GMetaArg } } +static bool isApplicableForResize(const IE::TensorDesc& desc) { + const auto layout = desc.getLayout(); + const auto prec = desc.getPrecision(); + return (layout == IE::Layout::NCHW || layout == IE::Layout::NHWC) && + (prec == IE::Precision::FP32 || prec == IE::Precision::U8); +} + static IE::PreProcessInfo configurePreProcInfo(const IE::InputInfo::CPtr& ii, const cv::GMetaArg& mm) { IE::PreProcessInfo info; @@ -835,9 +934,7 @@ static IE::PreProcessInfo configurePreProcInfo(const IE::InputInfo::CPtr& ii, info.setColorFormat(IE::ColorFormat::NV12); } } - const auto layout = ii->getTensorDesc().getLayout(); - if (layout == IE::Layout::NCHW || - layout == IE::Layout::NHWC) { + if (isApplicableForResize(ii->getTensorDesc())) { info.setResizeAlgorithm(IE::RESIZE_BILINEAR); } return info; @@ -957,11 +1054,7 @@ struct Infer: public cv::detail::KernelTag { configureInputReshapeByImage(ii, mm, input_reshape_table); } - // NB: Configure resize only for NCHW/NHWC layout, - // since it isn't supposed to work with others. - auto layout = ii->getTensorDesc().getLayout(); - if (layout == IE::Layout::NCHW || - layout == IE::Layout::NHWC) { + if (isApplicableForResize(ii->getTensorDesc())) { ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); } } @@ -1066,7 +1159,9 @@ struct InferROI: public cv::detail::KernelTag { uu.params.layer_names_to_reshape.end()) { configureInputReshapeByImage(ii, mm, input_reshape_table); } - ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); + if (isApplicableForResize(ii->getTensorDesc())) { + ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); + } // FIXME: This isn't the best place to call reshape function. // Сorrect solution would be to do this in compile() method of network, @@ -1114,10 +1209,9 @@ struct InferROI: public cv::detail::KernelTag { // it should be treated as image IE::Blob::Ptr this_blob = extractBlob(*ctx, 1, cv::gapi::ie::TraitAs::IMAGE); - setBlob(req, - *(ctx->uu.params.input_names.begin()), - IE::make_shared_blob(this_blob, toIE(this_roi)), - *ctx); + setROIBlob(req, + *(ctx->uu.params.input_names.begin()), + this_blob, this_roi, *ctx); // FIXME: Should it be done by kernel ? // What about to do that in RequestPool ? req.StartAsync(); @@ -1165,7 +1259,9 @@ struct InferList: public cv::detail::KernelTag { uu.params.layer_names_to_reshape.end()) { configureInputReshapeByImage(ii, mm, input_reshape_table); } - ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); + if (isApplicableForResize(ii->getTensorDesc())) { + ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); + } } // FIXME: This isn't the best place to call reshape function. @@ -1318,7 +1414,9 @@ struct InferList2: public cv::detail::KernelTag { uu.params.layer_names_to_reshape.end()) { configureInputReshapeByImage(ii, mm_0, input_reshape_table); } - ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); + if (isApplicableForResize(ii->getTensorDesc())) { + ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); + } // FIXME: This isn't the best place to call reshape function. // Сorrect solution would be to do this in compile() method of network, diff --git a/modules/gapi/src/backends/oak/goak.cpp b/modules/gapi/src/backends/oak/goak.cpp new file mode 100644 index 0000000000..6d9044aefa --- /dev/null +++ b/modules/gapi/src/backends/oak/goak.cpp @@ -0,0 +1,47 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#include +#include + +#include "oak_media_adapter.hpp" + +#include +#include + +namespace cv { +namespace gapi { +namespace oak { + +GArray encode(const GFrame& in, const EncoderConfig& cfg) { + return GEncFrame::on(in, cfg); +} + +GFrame sobelXY(const GFrame& in, const cv::Mat& hk, const cv::Mat& vk) { + return GSobelXY::on(in, hk, vk); +} + +// This is a dummy oak::ColorCamera class that just makes our pipelining +// machinery work. The real data comes from the physical camera which +// is handled by DepthAI library. +ColorCamera::ColorCamera() + : m_dummy(cv::MediaFrame::Create()) { +} + +bool ColorCamera::pull(cv::gapi::wip::Data &data) { + // FIXME: Avoid passing this formal frame to the pipeline + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + data = m_dummy; + return true; +} + +cv::GMetaArg ColorCamera::descr_of() const { + return cv::GMetaArg{cv::descr_of(m_dummy)}; +} + +} // namespace oak +} // namespace gapi +} // namespace cv diff --git a/modules/gapi/src/backends/oak/goak_media_adapter.cpp b/modules/gapi/src/backends/oak/goak_media_adapter.cpp new file mode 100644 index 0000000000..c8e6bbb59b --- /dev/null +++ b/modules/gapi/src/backends/oak/goak_media_adapter.cpp @@ -0,0 +1,32 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#include "oak_media_adapter.hpp" + +namespace cv { +namespace gapi { +namespace oak { + +OAKMediaAdapter::OAKMediaAdapter(cv::Size sz, cv::MediaFormat fmt, std::vector&& buffer) { + GAPI_Assert(fmt == cv::MediaFormat::NV12 && "OAKMediaAdapter only supports NV12 format for now"); + m_sz = sz; + m_fmt = fmt; + m_buffer = buffer; +} + +MediaFrame::View OAKMediaAdapter::OAKMediaAdapter::access(MediaFrame::Access) { + uint8_t* y_ptr = m_buffer.data(); + uint8_t* uv_ptr = m_buffer.data() + static_cast(m_buffer.size() / 3 * 2); + return MediaFrame::View{cv::MediaFrame::View::Ptrs{y_ptr, uv_ptr}, + cv::MediaFrame::View::Strides{static_cast(m_sz.width), + static_cast(m_sz.width)}}; +} + +cv::GFrameDesc OAKMediaAdapter::OAKMediaAdapter::meta() const { return {m_fmt, m_sz}; } + +} // namespace oak +} // namespace gapi +} // namespace cv diff --git a/modules/gapi/src/backends/oak/goakbackend.cpp b/modules/gapi/src/backends/oak/goakbackend.cpp new file mode 100644 index 0000000000..dc0daaead3 --- /dev/null +++ b/modules/gapi/src/backends/oak/goakbackend.cpp @@ -0,0 +1,711 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#include // GKernelPackage + +#ifdef HAVE_OAK + +#include +#include +#include // any_of +#include // reference_wrapper + +#include + +#include +#include + +#include // streaming::meta_tag + +#include "depthai/depthai.hpp" + +#include +#include "oak_media_adapter.hpp" + +namespace cv { namespace gimpl { + +// Forward declaration +class GOAKContext; +struct OAKNodeInfo; + +class GOAKExecutable final: public GIslandExecutable { + friend class GOAKContext; + virtual void run(std::vector&&, + std::vector&&) override { + GAPI_Assert(false && "Not implemented"); + } + + virtual void run(GIslandExecutable::IInput &in, + GIslandExecutable::IOutput &out) override; + + void LinkToParents(ade::NodeHandle handle); + + class ExtractTypeHelper : protected dai::Node { + public: + using Input = dai::Node::Input; + using Output = dai::Node::Output; + using InputPtr = dai::Node::Input*; + using OutputPtr = dai::Node::Output*; + }; + + struct OAKNodeInfo { + std::shared_ptr node = nullptr; + std::vector inputs = {}; + std::vector outputs = {}; + }; + + struct OAKOutQueueInfo { + std::shared_ptr xlink_output; + std::shared_ptr out_queue; + std::string out_queue_name; + }; + + cv::GArg packInArg(const GArg &arg, std::vector& oak_ins); + void packOutArg(const RcDesc &rc, std::vector& oak_outs); + + const ade::Graph& m_g; + GModel::ConstGraph m_gm; + cv::GCompileArgs m_args; + + std::unordered_map> m_oak_nodes; + + // Will be reworked later when XLinkIn will be introduced as input + std::shared_ptr m_camera_input; + cv::Size m_camera_size; + + // Backend outputs + std::vector m_out_queues; + + // Backend inputs + std::vector> m_in_queues; + + // Note: dai::Pipeline should be the only one for the whole pipeline, + // so there is no way to insert any non-OAK node in graph between other OAK nodes. + // The only heterogeneous case possible is if we insert other backends after or before + // OAK island. + std::unique_ptr m_device; + std::unique_ptr m_pipeline; + +public: + GOAKExecutable(const ade::Graph& g, + const cv::GCompileArgs& args, + const std::vector& nodes, + const std::vector& ins_data, + const std::vector& outs_data); + ~GOAKExecutable() = default; + + // FIXME: could it reshape? + virtual bool canReshape() const override { return false; } + virtual void reshape(ade::Graph&, const GCompileArgs&) override { + GAPI_Assert(false && "GOAKExecutable::reshape() is not supported"); + } + + virtual void handleNewStream() override; + virtual void handleStopStream() override; +}; + +class GOAKContext { +public: + // FIXME: make private? + using Input = GOAKExecutable::ExtractTypeHelper::Input; + using Output = GOAKExecutable::ExtractTypeHelper::Output; + using InputPtr = GOAKExecutable::ExtractTypeHelper::Input*; + using OutputPtr = GOAKExecutable::ExtractTypeHelper::Output*; + + GOAKContext(const std::unique_ptr& pipeline, + const cv::Size& camera_size, + std::vector& args, + std::vector& results); + + // Generic accessor API + template + T& inArg(int input) { return m_args.at(input).get(); } + + // FIXME: consider not using raw pointers + InputPtr& in(int input); + OutputPtr& out(int output); + + const std::unique_ptr& pipeline(); + const cv::Size& camera_size() const; + +private: + const std::unique_ptr& m_pipeline; + const cv::Size& m_camera_size; + std::vector& m_args; + std::vector& m_outputs; +}; + +GOAKContext::GOAKContext(const std::unique_ptr& pipeline, + const cv::Size& camera_size, + std::vector& args, + std::vector& results) + : m_pipeline(pipeline), m_camera_size(camera_size), m_args(args), m_outputs(results) {} + +const std::unique_ptr& GOAKContext::pipeline() { + return m_pipeline; +} + +const cv::Size& GOAKContext::camera_size() const { + return m_camera_size; +} + +GOAKContext::InputPtr& GOAKContext::in(int input) { + return inArg>(input).get(); +} + +GOAKContext::OutputPtr& GOAKContext::out(int output) { + return m_outputs.at(output); +} + +namespace detail { +template struct get_in; +template<> struct get_in { + static GOAKContext::InputPtr& get(GOAKContext &ctx, int idx) { return ctx.in(idx); } +}; +template struct get_in { + static T get(GOAKContext &ctx, int idx) { return ctx.inArg(idx); } +}; +// FIXME: add support of other types + +template struct get_out; +template<> struct get_out { + static GOAKContext::OutputPtr& get(GOAKContext &ctx, int idx) { return ctx.out(idx); } +}; +template struct get_out> { + static GOAKContext::OutputPtr& get(GOAKContext &ctx, int idx) { return ctx.out(idx); } +}; +// FIXME: add support of other types + +struct OAKKernelParams { + const std::unique_ptr& pipeline; + const cv::Size& camera_size; + std::vector>& m_in_queues; +}; + +template +struct OAKCallHelper; + +template +struct OAKCallHelper, std::tuple > { + template + static std::shared_ptr construct_impl( GOAKContext &ctx + , std::vector>& in_queues_params + , cv::detail::Seq + , cv::detail::Seq) { + return Impl::put(OAKKernelParams{ctx.pipeline(), + ctx.camera_size(), + in_queues_params}, + get_in::get(ctx, IIs)..., + get_out::get(ctx, OIs)...); + } + + static std::shared_ptr construct(GOAKContext &ctx, + std::vector>& in_queues_params) { + return construct_impl(ctx, + in_queues_params, + typename cv::detail::MkSeq::type(), + typename cv::detail::MkSeq::type()); + } +}; + +} // namespace detail + +struct GOAKKernel { + using F = std::function(GOAKContext&, + std::vector>&)>; + explicit GOAKKernel(const F& f) : m_put_f(f) {} + const F m_put_f; +}; + +struct OAKComponent +{ + static const char *name() { return "OAK Component"; } + GOAKKernel k; +}; + +}} // namespace gimpl // namespace cv + +using OAKGraph = ade::TypedGraph + < cv::gimpl::OAKComponent + // FIXME: extend + >; + +using ConstOAKGraph = ade::ConstTypedGraph + < cv::gimpl::OAKComponent + // FIXME: extend + >; + +// This function links dai operation nodes - parent's output to child's input. +// It utilizes G-API graph to search for operation's node it's previous operation in graph +// when links them in dai graph. +void cv::gimpl::GOAKExecutable::LinkToParents(ade::NodeHandle handle) +{ + ade::NodeHandle parent; + for (const auto& data_nh : handle.get()->inNodes()) { + // Data node has only 1 input + GAPI_Assert(data_nh.get()->inNodes().size() == 1); + parent = data_nh.get()->inNodes().front(); + + // Assuming that OAK nodes are aligned for linking. + // FIXME: potential rework might be needed then + // counterexample is found. + GAPI_Assert(m_oak_nodes.at(handle).inputs.size() == + m_oak_nodes.at(parent).outputs.size() && + "Internal OAK nodes are not aligned for linking"); + for (auto && it : ade::util::zip(ade::util::toRange(m_oak_nodes.at(parent).outputs), + ade::util::toRange(m_oak_nodes.at(handle).inputs))) + { + auto &out = std::get<0>(it); + auto &in = std::get<1>(it); + out->link(*in); + } + } +} + +cv::GArg +cv::gimpl::GOAKExecutable::packInArg(const GArg &arg, + std::vector& oak_ins) { + if (arg.kind != cv::detail::ArgKind::GOBJREF) { + GAPI_Assert( arg.kind != cv::detail::ArgKind::GMAT + && arg.kind != cv::detail::ArgKind::GSCALAR + && arg.kind != cv::detail::ArgKind::GARRAY + && arg.kind != cv::detail::ArgKind::GOPAQUE + && arg.kind != cv::detail::ArgKind::GFRAME); + // All other cases - pass as-is, with no transformations to + // GArg contents. + return const_cast(arg); + } + const cv::gimpl::RcDesc &ref = arg.get(); + switch (ref.shape) { + case GShape::GFRAME: + oak_ins.push_back(nullptr); + return GArg(std::reference_wrapper(oak_ins.back())); + break; + default: + util::throw_error(std::logic_error("Unsupported GShape type in OAK backend")); + break; + } +} + +void cv::gimpl::GOAKExecutable::packOutArg(const RcDesc &rc, + std::vector& oak_outs) { + switch (rc.shape) { + case GShape::GFRAME: + oak_outs.push_back(nullptr); + break; + case GShape::GARRAY: + oak_outs.push_back(nullptr); + break; + default: + util::throw_error(std::logic_error("Unsupported GShape type in OAK backend")); + break; + } +} + +cv::gimpl::GOAKExecutable::GOAKExecutable(const ade::Graph& g, + const cv::GCompileArgs &args, + const std::vector& nodes, + const std::vector& ins_data, + const std::vector& outs_data) + : m_g(g), m_gm(m_g), m_args(args), + m_device(nullptr), m_pipeline(new dai::Pipeline) + { + // FIXME: currently OAK backend only works with camera as input, + // so it must be a single object + GAPI_Assert(ins_data.size() == 1); + + // Check that there is only one OAK island in graph since there + // can only be one instance of dai::Pipeline in the application + auto isl_graph = m_gm.metadata().get().model; + GIslandModel::Graph gim(*isl_graph); + size_t oak_islands = 0; + + for (const auto& nh : gim.nodes()) + { + if (gim.metadata(nh).get().k == NodeKind::ISLAND) + { + const auto isl = gim.metadata(nh).get().object; + if (isl->backend() == cv::gapi::oak::backend()) + { + ++oak_islands; + } + if (oak_islands > 1) { + util::throw_error + (std::logic_error + ("There can only be one OAK island in graph")); + } + } + } + + // FIXME: change the hard-coded behavior (XLinkIn path) + auto camRgb = m_pipeline->create(); + // FIXME: extract camera compile arguments here and properly convert them for dai + camRgb->setBoardSocket(dai::CameraBoardSocket::RGB); + camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P); + + // Set camera output. Fixme: consider working with other camera outputs + m_camera_input = camRgb; + // FIXME: change when other camera censors are introduced + std::tuple video_size = camRgb->getVideoSize(); + m_camera_size = cv::Size{std::get<0>(video_size), std::get<1>(video_size)}; + + // Prepare XLinkOut nodes for each output object in graph + for (size_t i = 0; i < outs_data.size(); ++i) { + auto xout = m_pipeline->create(); + std::string xout_name = "xout" + std::to_string(i); + xout->setStreamName(xout_name); + m_out_queues.push_back({xout, nullptr, xout_name}); + } + + // Create OAK node for each node in this backend + for (const auto& nh : nodes) { + if (m_gm.metadata(nh).get().t == NodeType::OP) { + const auto& op = m_gm.metadata(nh).get(); + const auto &u = ConstOAKGraph(m_g).metadata(nh).get(); + // pass kernel input args and compile args to prepare OAK node and + // store it to link later + m_oak_nodes[nh] = {}; + m_oak_nodes.at(nh).inputs.reserve(op.args.size()); + m_oak_nodes.at(nh).outputs.reserve(op.outs.size()); + + std::vector in_ctx_args; + in_ctx_args.reserve(op.args.size()); + for (auto &op_arg : op.args) in_ctx_args.push_back(packInArg(op_arg, + m_oak_nodes.at(nh).inputs)); + for (auto &&op_out : op.outs) packOutArg(op_out, m_oak_nodes.at(nh).outputs); + GAPI_Assert(!m_oak_nodes.at(nh).inputs.empty()); + GAPI_Assert(!m_oak_nodes.at(nh).outputs.empty()); + + GOAKContext ctx(m_pipeline, m_camera_size, in_ctx_args, m_oak_nodes.at(nh).outputs); + m_oak_nodes.at(nh).node = u.k.m_put_f(ctx, m_in_queues); + GAPI_Assert(m_oak_nodes.at(nh).node != nullptr); + + // Check that all inputs and outputs are properly filled after constructing kernels + // to then link it together + // FIXME: add more logging + const auto& node = m_oak_nodes.at(nh); + if (std::any_of(node.inputs.cbegin(), node.inputs.cend(), + [](ExtractTypeHelper::InputPtr ptr) { + return ptr == nullptr; + })) { + GAPI_Assert(false && "DAI input are not set"); + } + if (std::any_of(node.outputs.cbegin(), node.outputs.cend(), + [](ExtractTypeHelper::OutputPtr ptr) { + return ptr == nullptr; + })) { + GAPI_Assert(false && "DAI outputs are not set"); + } + } + } + + // Prepare nodes for linking + std::unordered_set> in_nodes; + std::unordered_set> out_nodes; + std::unordered_set> inter_nodes; + + // TODO: optimize this loop + for (const auto& node : m_oak_nodes) { + auto nh = node.first; + // Fill input op nodes + for (const auto& d : ins_data) { + for (const auto& indata : nh.get()->inNodes()) { + auto rc = m_gm.metadata(indata).get().rc; + if (rc == d.rc) { + in_nodes.insert(nh); + } + } + } + // Fill output op nodes + for (const auto& d : outs_data) { + for (const auto& outdata : nh.get()->outNodes()) { + auto rc = m_gm.metadata(outdata).get().rc; + if (rc == d.rc) { + out_nodes.insert(nh); + } + } + } + // Fill internal op nodes + if (in_nodes.find(nh) == in_nodes.end() && + out_nodes.find(nh) == in_nodes.end()) { + inter_nodes.insert(nh); + } + } + + // Properly link all nodes + // 1. Link input nodes to camera + for (const auto& nh : in_nodes) { + GAPI_Assert(m_oak_nodes.at(nh).inputs.size() == 1); + // FIXME: covert other camera outputs + m_camera_input->video.link(*(m_oak_nodes.at(nh).inputs[0])); + } + + // 2. Link output nodes to XLinkOut nodes + size_t out_counter = 0; + for (const auto& nh : out_nodes) { + GAPI_Assert(out_counter + m_oak_nodes.at(nh).outputs.size() <= m_out_queues.size()); + for (const auto& out : m_oak_nodes.at(nh).outputs) { + out->link(m_out_queues[out_counter++].xlink_output->input); + } + // Input nodes in OAK doesn't have parent operation - just camera (for now) + if (in_nodes.find(nh) == in_nodes.end()) { + LinkToParents(nh); + } + } + + // 3. Link internal nodes to their parents + for (const auto& nh : inter_nodes) { + // Input nodes in OAK doesn't have parent operation - just camera (for now) + if (in_nodes.find(nh) == in_nodes.end()) { + LinkToParents(nh); + } + } + + m_device = std::unique_ptr(new dai::Device(*m_pipeline)); + + // Prepare OAK output queues + GAPI_Assert(m_out_queues.size() == outs_data.size()); + for (const auto out_it : ade::util::indexed(outs_data)) + { + auto& q = m_out_queues[ade::util::index(out_it)]; + GAPI_Assert(q.out_queue == nullptr); // shouldn't be not filled till this point + // FIXME: add queue parameters + // Currently: 30 - max DAI queue capacity, true - blocking queue + q.out_queue = m_device->getOutputQueue(q.out_queue_name, 30, true); + } + } + +void cv::gimpl::GOAKExecutable::handleNewStream() { + // do nothing +} + +void cv::gimpl::GOAKExecutable::handleStopStream() { + // do nothing +} + +void cv::gimpl::GOAKExecutable::run(GIslandExecutable::IInput &in, + GIslandExecutable::IOutput &out) { + const auto in_msg = in.get(); + + if (cv::util::holds_alternative(in_msg)) { + out.post(cv::gimpl::EndOfStream{}); + return; + } + + for (const auto& in_q : m_in_queues) { + auto q = m_device->getInputQueue(in_q.first); + q->send(in_q.second); + } + + for (size_t i = 0; i < m_out_queues.size(); ++i) { + auto q = m_out_queues[i].out_queue; + // TODO: support other DAI types if needed + // Note: we utilize getData() method that returns std::vector of data + // on which we gain ownership + auto oak_frame = q->get(); + + auto out_arg = out.get(i); + + switch(out_arg.index()) { + case cv::GRunArgP::index_of(): + // FIXME: hard-coded NV12 + *cv::util::get(out_arg) = + cv::MediaFrame::Create( + cv::Size(static_cast(oak_frame->getWidth()), + static_cast(oak_frame->getHeight())), + cv::MediaFormat::NV12, + std::move(oak_frame->getData())); + break; + case cv::GRunArgP::index_of(): + cv::util::get(out_arg).wref() = std::move(oak_frame->getData()); + break; + // FIXME: Add support for remaining types + default: + GAPI_Assert(false && "Unsupported type in OAK backend"); + } + + using namespace cv::gapi::streaming::meta_tag; + cv::GRunArg::Meta meta; + meta[timestamp] = oak_frame->getTimestamp(); + meta[seq_id] = oak_frame->getSequenceNum(); + + out.meta(out_arg, meta); + out.post(std::move(out_arg)); + } +} + +// Built-in kernels for OAK ///////////////////////////////////////////////////// + +class GOAKBackendImpl final : public cv::gapi::GBackend::Priv { + virtual void unpackKernel(ade::Graph &graph, + const ade::NodeHandle &op_node, + const cv::GKernelImpl &impl) override { + OAKGraph gm(graph); + + const auto &kimpl = cv::util::any_cast(impl.opaque); + gm.metadata(op_node).set(cv::gimpl::OAKComponent{kimpl}); + } + + virtual EPtr compile(const ade::Graph &graph, + const cv::GCompileArgs &args, + const std::vector &nodes, + const std::vector& ins_data, + const std::vector& outs_data) const override { + cv::gimpl::GModel::ConstGraph gm(graph); + // FIXME: pass streaming/non-streaming option to support non-camera case + // NB: how could we have non-OAK source in streaming mode, then OAK backend in + // streaming mode but without camera input? + if (!gm.metadata().contains()) { + GAPI_Assert(false && "OAK backend only supports Streaming mode for now"); + } + return EPtr{new cv::gimpl::GOAKExecutable(graph, args, nodes, ins_data, outs_data)}; + } +}; + +cv::gapi::GBackend cv::gapi::oak::backend() { + static cv::gapi::GBackend this_backend(std::make_shared()); + return this_backend; +} + +namespace cv { +namespace gimpl { +namespace oak { + +namespace { +static dai::VideoEncoderProperties::Profile convertEncProfile(cv::gapi::oak::EncoderConfig::Profile pf) { + switch (pf) { + case cv::gapi::oak::EncoderConfig::Profile::H264_BASELINE: + return dai::VideoEncoderProperties::Profile::H264_BASELINE; + case cv::gapi::oak::EncoderConfig::Profile::H264_HIGH: + return dai::VideoEncoderProperties::Profile::H264_HIGH; + case cv::gapi::oak::EncoderConfig::Profile::H264_MAIN: + return dai::VideoEncoderProperties::Profile::H264_MAIN; + case cv::gapi::oak::EncoderConfig::Profile::H265_MAIN: + return dai::VideoEncoderProperties::Profile::H265_MAIN; + case cv::gapi::oak::EncoderConfig::Profile::MJPEG: + return dai::VideoEncoderProperties::Profile::MJPEG; + default: + // basically unreachable + GAPI_Assert("Unsupported encoder profile"); + return {}; + } +} +} // anonymous namespace + +// Kernels /////////////////////////////////////////////////////////////// + +template +class GOAKKernelImpl: public detail::OAKCallHelper + , public cv::detail::KernelTag { + using P = detail::OAKCallHelper; +public: + using API = K; + static cv::gapi::GBackend backend() { return cv::gapi::oak::backend(); } + static GOAKKernel kernel() { return GOAKKernel(&P::construct); } +}; + +#define GAPI_OAK_KERNEL(Name, API) \ + struct Name: public cv::gimpl::oak::GOAKKernelImpl + +namespace { +GAPI_OAK_KERNEL(GOAKEncFrame, cv::gapi::oak::GEncFrame) { + static std::shared_ptr put(const cv::gimpl::detail::OAKKernelParams& params, + GOAKContext::InputPtr& in, + const cv::gapi::oak::EncoderConfig& cfg, + GOAKContext::OutputPtr& out) { + auto videoEnc = params.pipeline->create(); + + // FIXME: convert all the parameters to dai + videoEnc->setDefaultProfilePreset(cfg.width, cfg.height, + cfg.frameRate, + convertEncProfile(cfg.profile)); + + in = &(videoEnc->input); + out = &(videoEnc->bitstream); + + return videoEnc; + } +}; + +GAPI_OAK_KERNEL(GOAKSobelXY, cv::gapi::oak::GSobelXY) { + static std::shared_ptr put(const cv::gimpl::detail::OAKKernelParams& params, + GOAKContext::InputPtr& in, + const cv::Mat& hk, + const cv::Mat& vk, + GOAKContext::OutputPtr& out) { + auto edgeDetector = params.pipeline->create(); + + edgeDetector->setMaxOutputFrameSize(params.camera_size.width * params.camera_size.height); + + auto xinEdgeCfg = params.pipeline->create(); + xinEdgeCfg->setStreamName("sobel_cfg"); + + auto mat2vec = [&](cv::Mat m) { + std::vector> v(m.rows); + for (int i = 0; i < m.rows; ++i) + { + m.row(i).reshape(1,1).copyTo(v[i]); + } + return v; + }; + + dai::EdgeDetectorConfig cfg; + cfg.setSobelFilterKernels(mat2vec(hk), mat2vec(vk)); + + xinEdgeCfg->out.link(edgeDetector->inputConfig); + + params.m_in_queues.push_back({"sobel_cfg", cfg}); + + in = &(edgeDetector->inputImage); + out = &(edgeDetector->outputImage); + + return edgeDetector; + } +}; +} // anonymous namespace +} // namespace oak +} // namespace gimpl +} // namespace cv + +namespace cv { +namespace gapi { +namespace oak { + +cv::gapi::GKernelPackage kernels() { + return cv::gapi::kernels< cv::gimpl::oak::GOAKEncFrame + , cv::gimpl::oak::GOAKSobelXY + >(); +} + +} // namespace oak +} // namespace gapi +} // namespace cv + +#else + +namespace cv { +namespace gapi { +namespace oak { + +cv::gapi::GKernelPackage kernels(); + +cv::gapi::GKernelPackage kernels() { + GAPI_Assert(false && "Built without OAK support"); + return {}; +} + +} // namespace oak +} // namespace gapi +} // namespace cv + +#endif // HAVE_OAK diff --git a/modules/gapi/src/backends/oak/oak_media_adapter.hpp b/modules/gapi/src/backends/oak/oak_media_adapter.hpp new file mode 100644 index 0000000000..9c81f5a953 --- /dev/null +++ b/modules/gapi/src/backends/oak/oak_media_adapter.hpp @@ -0,0 +1,35 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#ifndef OPENCV_GAPI_OAK_MEDIA_ADAPTER_HPP +#define OPENCV_GAPI_OAK_MEDIA_ADAPTER_HPP + +#include + +#include + +namespace cv { +namespace gapi { +namespace oak { + +class GAPI_EXPORTS OAKMediaAdapter final : public cv::MediaFrame::IAdapter { +public: + OAKMediaAdapter() = default; + OAKMediaAdapter(cv::Size sz, cv::MediaFormat fmt, std::vector&& buffer); + cv::GFrameDesc meta() const override; + cv::MediaFrame::View access(cv::MediaFrame::Access) override; + ~OAKMediaAdapter() = default; +private: + cv::Size m_sz; + cv::MediaFormat m_fmt; + std::vector m_buffer; +}; + +} // namespace oak +} // namespace gapi +} // namespace cv + +#endif // OPENCV_GAPI_OAK_MEDIA_ADAPTER_HPP diff --git a/modules/gapi/src/backends/streaming/gstreamingbackend.cpp b/modules/gapi/src/backends/streaming/gstreamingbackend.cpp index 457f423f54..4bd2a10ea5 100644 --- a/modules/gapi/src/backends/streaming/gstreamingbackend.cpp +++ b/modules/gapi/src/backends/streaming/gstreamingbackend.cpp @@ -282,6 +282,23 @@ void GOCVBGR::Actor::extractRMat(const cv::MediaFrame& frame, cv::RMat& rmat) rmat = cv::make_rmat(bgr); break; } + case cv::MediaFormat::GRAY: + { + std::call_once(m_warnFlag, + []() { + GAPI_LOG_WARNING(NULL, "\nOn-the-fly conversion from GRAY to BGR will happen.\n" + "Conversion may cost a lot for images with high resolution.\n" + "To retrieve cv::Mat from GRAY cv::MediaFrame for free, you may use " + "cv::gapi::streaming::Y.\n"); + }); + cv::Mat bgr; + auto view = frame.access(cv::MediaFrame::Access::R); + cv::Mat gray(desc.size, CV_8UC1, view.ptr[0], view.stride[0]); + cv::cvtColor(gray, bgr, cv::COLOR_GRAY2BGR); + rmat = cv::make_rmat(bgr); + break; + } + default: cv::util::throw_error( std::logic_error("Unsupported MediaFormat for cv::gapi::streaming::BGR")); @@ -339,6 +356,15 @@ void GOCVY::Actor::extractRMat(const cv::MediaFrame& frame, cv::RMat& rmat) }); break; } + case cv::MediaFormat::GRAY: + { + rmat = cv::make_rmat(frame, + [](const cv::GFrameDesc& d) { return cv::GMatDesc(CV_8U, 1, d.size); }, + [](const cv::GFrameDesc& d, const cv::MediaFrame::View& v) { + return cv::Mat(d.size, CV_8UC1, v.ptr[0], v.stride[0]); + }); + break; + } default: cv::util::throw_error( std::logic_error("Unsupported MediaFormat for cv::gapi::streaming::Y")); @@ -408,6 +434,12 @@ void GOCVUV::Actor::extractRMat(const cv::MediaFrame& frame, cv::RMat& rmat) }); break; } + case cv::MediaFormat::GRAY: + { + cv::Mat uv(desc.size / 2, CV_8UC2, cv::Scalar::all(127)); + rmat = cv::make_rmat(uv); + break; + } default: cv::util::throw_error( std::logic_error("Unsupported MediaFormat for cv::gapi::streaming::UV")); diff --git a/modules/gapi/src/executor/gstreamingexecutor.cpp b/modules/gapi/src/executor/gstreamingexecutor.cpp index d15e17ea28..a3a2746acc 100644 --- a/modules/gapi/src/executor/gstreamingexecutor.cpp +++ b/modules/gapi/src/executor/gstreamingexecutor.cpp @@ -323,16 +323,40 @@ public: void rewindToStop(std::vector &in_queues, const std::size_t this_id) { - for (auto &&qit : ade::util::indexed(in_queues)) - { - auto id2 = ade::util::index(qit); - auto &q2 = ade::util::value(qit); - if (this_id == id2) continue; + size_t expected_stop_count = std::count_if(in_queues.begin(), in_queues.end(), [] (const Q* ptr) { + return ptr != nullptr; + }); - Cmd cmd; - while (q2 && !cv::util::holds_alternative(cmd)) - q2->pop(cmd); + if (expected_stop_count > 0) { + // NB: it requires to substract own queues id from total waiting queue count + // because it had got stop message before rewind was called + expected_stop_count--; } + GAPI_LOG_DEBUG(nullptr, "id: " << this_id << ", queues count: " << in_queues.size() << + ", expected stop msg count: " << expected_stop_count); + size_t got_stop_count = 0; + while(got_stop_count < expected_stop_count) { + for (auto &&qit : ade::util::indexed(in_queues)) { + auto id2 = ade::util::index(qit); + auto &q2 = ade::util::value(qit); + if (this_id == id2) continue; + + GAPI_LOG_DEBUG(nullptr, "drain next id: " << id2 << + ", stop count (" << got_stop_count << "/" << + expected_stop_count << ")"); + bool got_cmd = true; + while (q2 && got_cmd) { + Cmd cmd; + got_cmd = q2->try_pop(cmd); + if (got_cmd && cv::util::holds_alternative(cmd)) { + got_stop_count ++; + GAPI_LOG_DEBUG(nullptr, "got stop from id: " << id2); + break; + } + } + } + } + GAPI_LOG_DEBUG(nullptr, "completed"); } // This method handles a stop sign got from some input diff --git a/modules/gapi/src/logger.hpp b/modules/gapi/src/logger.hpp index cb169bf4be..7ac3c983fc 100644 --- a/modules/gapi/src/logger.hpp +++ b/modules/gapi/src/logger.hpp @@ -14,10 +14,12 @@ # define GAPI_LOG_INFO(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__) # define GAPI_LOG_WARNING(tag, ...) CV_LOG_WARNING(tag, __VA_ARGS__) # define GAPI_LOG_DEBUG(tag, ...) CV_LOG_DEBUG(tag, __VA_ARGS__) +# define GAPI_LOG_FATAL(tag, ...) CV_LOG_FATAL(tag, __VA_ARGS__) #else # define GAPI_LOG_INFO(tag, ...) # define GAPI_LOG_WARNING(tag, ...) # define GAPI_LOG_DEBUG(tag, ...) +# define GAPI_LOG_FATAL(tag, ...) #endif // !defined(GAPI_STANDALONE) diff --git a/modules/gapi/src/streaming/gstreamer/gstreamer_media_adapter.cpp b/modules/gapi/src/streaming/gstreamer/gstreamer_media_adapter.cpp index 9019289ae4..188f162ffd 100644 --- a/modules/gapi/src/streaming/gstreamer/gstreamer_media_adapter.cpp +++ b/modules/gapi/src/streaming/gstreamer/gstreamer_media_adapter.cpp @@ -28,13 +28,41 @@ GStreamerMediaAdapter::GStreamerMediaAdapter(const cv::GFrameDesc& frameDesc, GstVideoMeta* videoMeta = gst_buffer_get_video_meta(m_buffer); if (videoMeta != nullptr) { - m_strides = { videoMeta->stride[0], videoMeta->stride[1] }; - m_offsets = { videoMeta->offset[0], videoMeta->offset[1] }; + switch (m_frameDesc.fmt) { + case cv::MediaFormat::NV12: { + m_strides = { videoMeta->stride[0], videoMeta->stride[1] }; + m_offsets = { videoMeta->offset[0], videoMeta->offset[1] }; + break; + } + case cv::MediaFormat::GRAY: { + m_strides = { videoMeta->stride[0]}; + m_offsets = { videoMeta->offset[0]}; + break; + } + default: { + GAPI_Assert(false && "Non NV12 or GRAY Media format is not expected here"); + break; + } + } } else { - m_strides = { GST_VIDEO_INFO_PLANE_STRIDE(m_videoInfo.get(), 0), - GST_VIDEO_INFO_PLANE_STRIDE(m_videoInfo.get(), 1) }; - m_offsets = { GST_VIDEO_INFO_PLANE_OFFSET(m_videoInfo.get(), 0), - GST_VIDEO_INFO_PLANE_OFFSET(m_videoInfo.get(), 1) }; + switch (m_frameDesc.fmt) { + case cv::MediaFormat::NV12: { + m_strides = { GST_VIDEO_INFO_PLANE_STRIDE(m_videoInfo.get(), 0), + GST_VIDEO_INFO_PLANE_STRIDE(m_videoInfo.get(), 1) }; + m_offsets = { GST_VIDEO_INFO_PLANE_OFFSET(m_videoInfo.get(), 0), + GST_VIDEO_INFO_PLANE_OFFSET(m_videoInfo.get(), 1) }; + break; + } + case cv::MediaFormat::GRAY: { + m_strides = { GST_VIDEO_INFO_PLANE_STRIDE(m_videoInfo.get(), 0)}; + m_offsets = { GST_VIDEO_INFO_PLANE_OFFSET(m_videoInfo.get(), 0)}; + break; + } + default: { + GAPI_Assert(false && "Non NV12 or GRAY Media format is not expected here"); + break; + } + } } } @@ -71,8 +99,10 @@ cv::MediaFrame::View GStreamerMediaAdapter::access(cv::MediaFrame::Access access if(!m_isMapped.load(std::memory_order_relaxed)) { - GAPI_Assert(GST_VIDEO_INFO_N_PLANES(m_videoInfo.get()) == 2); - GAPI_Assert(GST_VIDEO_INFO_FORMAT(m_videoInfo.get()) == GST_VIDEO_FORMAT_NV12); + GAPI_Assert(GST_VIDEO_INFO_N_PLANES(m_videoInfo.get()) == 2 || + GST_VIDEO_INFO_N_PLANES(m_videoInfo.get()) == 1); + GAPI_Assert(GST_VIDEO_INFO_FORMAT(m_videoInfo.get()) == GST_VIDEO_FORMAT_NV12 || + GST_VIDEO_INFO_FORMAT(m_videoInfo.get()) == GST_VIDEO_FORMAT_GRAY8); // TODO: Use RAII for map/unmap if (access == cv::MediaFrame::Access::W) { @@ -85,27 +115,56 @@ cv::MediaFrame::View GStreamerMediaAdapter::access(cv::MediaFrame::Access access } GAPI_Assert(GST_VIDEO_FRAME_PLANE_STRIDE(&m_videoFrame, 0) == m_strides[0]); - GAPI_Assert(GST_VIDEO_FRAME_PLANE_STRIDE(&m_videoFrame, 1) == m_strides[1]); GAPI_Assert(GST_VIDEO_FRAME_PLANE_OFFSET(&m_videoFrame, 0) == m_offsets[0]); - GAPI_Assert(GST_VIDEO_FRAME_PLANE_OFFSET(&m_videoFrame, 1) == m_offsets[1]); + if (m_frameDesc.fmt == cv::MediaFormat::NV12) { + GAPI_Assert(GST_VIDEO_FRAME_PLANE_STRIDE(&m_videoFrame, 1) == m_strides[1]); + GAPI_Assert(GST_VIDEO_FRAME_PLANE_OFFSET(&m_videoFrame, 1) == m_offsets[1]); + } m_isMapped.store(true, std::memory_order_release); } } - cv::MediaFrame::View::Ptrs ps { - static_cast(GST_VIDEO_FRAME_PLANE_DATA(&m_videoFrame, 0)) + m_offsets[0], // Y-plane - static_cast(GST_VIDEO_FRAME_PLANE_DATA(&m_videoFrame, 0)) + m_offsets[1], // UV-plane - nullptr, - nullptr - }; + cv::MediaFrame::View::Ptrs ps; + cv::MediaFrame::View::Strides ss; + + switch (m_frameDesc.fmt) { + case cv::MediaFormat::NV12: { + ps = { + static_cast(GST_VIDEO_FRAME_PLANE_DATA(&m_videoFrame, 0)) + m_offsets[0], // Y-plane + static_cast(GST_VIDEO_FRAME_PLANE_DATA(&m_videoFrame, 0)) + m_offsets[1], // UV-plane + nullptr, + nullptr + }; + ss = { + static_cast(m_strides[0]), // Y-plane stride + static_cast(m_strides[1]), // UV-plane stride + 0u, + 0u + }; + break; + } + case cv::MediaFormat::GRAY: { + ps = { + static_cast(GST_VIDEO_FRAME_PLANE_DATA(&m_videoFrame, 0)) + m_offsets[0], // Y-plane + nullptr, + nullptr, + nullptr + }; + ss = { + static_cast(m_strides[0]), // Y-plane stride + 0u, + 0u, + 0u + }; + break; + } + default: { + GAPI_Assert(false && "Non NV12 or GRAY Media format is not expected here"); + break; + } + } - cv::MediaFrame::View::Strides ss = { - static_cast(m_strides[0]), // Y-plane stride - static_cast(m_strides[1]), // UV-plane stride - 0u, - 0u - }; --thread_counters; return cv::MediaFrame::View(std::move(ps), std::move(ss)); diff --git a/modules/gapi/src/streaming/gstreamer/gstreamersource.cpp b/modules/gapi/src/streaming/gstreamer/gstreamersource.cpp index 661125657c..f1bd438ce2 100644 --- a/modules/gapi/src/streaming/gstreamer/gstreamersource.cpp +++ b/modules/gapi/src/streaming/gstreamer/gstreamersource.cpp @@ -30,8 +30,9 @@ namespace gst { #ifdef HAVE_GSTREAMER -constexpr char NV12_CAPS_STRING[] = - "video/x-raw,format=NV12;video/x-raw(memory:DMABuf),format=NV12"; +constexpr char ALLOWED_CAPS_STRING[] = + "video/x-raw,format=(string){NV12, GRAY8};video/x-raw(memory:DMABuf),format=(string){NV12, GRAY8}"; + namespace { GstPadProbeReturn appsinkQueryCallback(GstPad*, GstPadProbeInfo* info, gpointer) @@ -137,17 +138,17 @@ void GStreamerSource::Priv::configureAppsink() { // Do not emit signals: all calls will be synchronous and blocking. gst_app_sink_set_emit_signals(GST_APP_SINK(m_appsink.get()), FALSE); - GStreamerPtr nv12Caps(gst_caps_from_string(NV12_CAPS_STRING)); + GStreamerPtr gstCaps(gst_caps_from_string(ALLOWED_CAPS_STRING)); GStreamerPtr appsinkPad(gst_element_get_static_pad(m_appsink, "sink")); GStreamerPtr peerCaps(gst_pad_peer_query_caps(appsinkPad, NULL)); - if (!gst_caps_can_intersect(peerCaps, nv12Caps)) { + if (!gst_caps_can_intersect(peerCaps, gstCaps)) { cv::util::throw_error( - std::logic_error("appsink element can only consume video-frame in NV12 format in " + std::logic_error("appsink element can only consume video-frame in NV12 or GRAY8 format in " "GStreamerSource")); } - gst_app_sink_set_caps(GST_APP_SINK(m_appsink.get()), nv12Caps); + gst_app_sink_set_caps(GST_APP_SINK(m_appsink.get()), gstCaps); gst_pad_add_probe(appsinkPad, GST_PAD_PROBE_TYPE_QUERY_DOWNSTREAM, appsinkQueryCallback, NULL, NULL); @@ -184,10 +185,29 @@ void GStreamerSource::Priv::prepareVideoMeta() cv::util::throw_error(std::logic_error("Cannot query video width/height.")); } + // Fill GstVideoInfo structure to work further with GstVideoFrame class. + if (!gst_video_info_from_caps(&m_videoInfo, prerollCaps)) { + cv::util::throw_error(std::logic_error("preroll sample has invalid caps.")); + } + m_type = GST_VIDEO_INFO_FORMAT(&m_videoInfo); switch(m_outputType) { case GStreamerSource::OutputType::FRAME: { // Construct metadata for media frame. - m_mediaFrameMeta = GFrameDesc { cv::MediaFormat::NV12, cv::Size(width, height) }; + switch (m_type) { + case GST_VIDEO_FORMAT_NV12: { + m_mediaFrameMeta = GFrameDesc{ cv::MediaFormat::NV12, cv::Size(width, height) }; + GAPI_Assert(GST_VIDEO_INFO_N_PLANES(&m_videoInfo) == 2); + break; + } + case GST_VIDEO_FORMAT_GRAY8: { + m_mediaFrameMeta = GFrameDesc{ cv::MediaFormat::GRAY, cv::Size(width, height) }; + GAPI_Assert(GST_VIDEO_INFO_N_PLANES(&m_videoInfo) == 1); + break; + } + default: { + GAPI_Assert(false && "Unsupported GStreamerSource FRAME type."); + } + } break; } case GStreamerSource::OutputType::MAT: { @@ -197,13 +217,6 @@ void GStreamerSource::Priv::prepareVideoMeta() } } - // Fill GstVideoInfo structure to work further with GstVideoFrame class. - if (!gst_video_info_from_caps(&m_videoInfo, prerollCaps)) { - cv::util::throw_error(std::logic_error("preroll sample has invalid caps.")); - } - GAPI_Assert(GST_VIDEO_INFO_N_PLANES(&m_videoInfo) == 2); - GAPI_Assert(GST_VIDEO_INFO_FORMAT(&m_videoInfo) == GST_VIDEO_FORMAT_NV12); - m_isMetaPrepared = true; } } @@ -272,28 +285,46 @@ bool GStreamerSource::Priv::retrieveFrame(cv::Mat& data) try { - // m_matMeta holds width and height for 8U BGR frame, but actual - // frame m_buffer we request from GStreamer pipeline has 8U NV12 format. - // Constructing y and uv cv::Mat-s from such a m_buffer: - GAPI_Assert((uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&videoFrame, 1) == + switch (m_type) { + case GST_VIDEO_FORMAT_NV12: { + // m_matMeta holds width and height for 8U BGR frame, but actual + // frame m_buffer we request from GStreamer pipeline has 8U NV12 format. + // Constructing y and uv cv::Mat-s from such a m_buffer: + GAPI_Assert((uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&videoFrame, 1) == (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&videoFrame, 0) + GST_VIDEO_FRAME_PLANE_OFFSET(&videoFrame, 1)); + GAPI_Assert(GST_VIDEO_INFO_N_PLANES(&m_videoInfo) == 2); - cv::Mat y(m_matMeta.size, CV_8UC1, - (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&videoFrame, 0) + - GST_VIDEO_FRAME_PLANE_OFFSET(&videoFrame, 0), - GST_VIDEO_FRAME_PLANE_STRIDE(&videoFrame, 0)); - cv::Mat uv(m_matMeta.size / 2, CV_8UC2, - (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&videoFrame, 0) + - GST_VIDEO_FRAME_PLANE_OFFSET(&videoFrame, 1), - GST_VIDEO_FRAME_PLANE_STRIDE(&videoFrame, 1)); + cv::Mat y(m_matMeta.size, CV_8UC1, + (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&videoFrame, 0) + + GST_VIDEO_FRAME_PLANE_OFFSET(&videoFrame, 0), + GST_VIDEO_FRAME_PLANE_STRIDE(&videoFrame, 0)); + cv::Mat uv(m_matMeta.size / 2, CV_8UC2, + (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&videoFrame, 0) + + GST_VIDEO_FRAME_PLANE_OFFSET(&videoFrame, 1), + GST_VIDEO_FRAME_PLANE_STRIDE(&videoFrame, 1)); - cv::cvtColorTwoPlane(y, uv, data, cv::COLOR_YUV2BGR_NV12); + cv::cvtColorTwoPlane(y, uv, data, cv::COLOR_YUV2BGR_NV12); + break; + } + case GST_VIDEO_FORMAT_GRAY8: { + GAPI_Assert(GST_VIDEO_INFO_N_PLANES(&m_videoInfo) == 1); + cv::Mat y(m_matMeta.size, CV_8UC1, + (uint8_t*)GST_VIDEO_FRAME_PLANE_DATA(&videoFrame, 0) + + GST_VIDEO_FRAME_PLANE_OFFSET(&videoFrame, 0), + GST_VIDEO_FRAME_PLANE_STRIDE(&videoFrame, 0)); + cv::cvtColor(y, data, cv::COLOR_GRAY2BGR); + break; + } + default: { + GAPI_Assert(false && "retrieveFrame - unsupported GStreamerSource FRAME type."); + } + } } catch (...) { gst_video_frame_unmap(&videoFrame); - cv::util::throw_error(std::runtime_error("NV12 buffer conversion to BGR is failed!")); + cv::util::throw_error(std::runtime_error("NV12 or GRAY8 buffer conversion to BGR is failed!")); } gst_video_frame_unmap(&videoFrame); diff --git a/modules/gapi/src/streaming/gstreamer/gstreamersource_priv.hpp b/modules/gapi/src/streaming/gstreamer/gstreamersource_priv.hpp index b0940c48a3..0671213197 100644 --- a/modules/gapi/src/streaming/gstreamer/gstreamersource_priv.hpp +++ b/modules/gapi/src/streaming/gstreamer/gstreamersource_priv.hpp @@ -59,6 +59,7 @@ protected: bool m_isPipelinePlaying = false; int64_t m_frameId = 0L; + size_t m_type = 0; //Gstreamer video format type protected: void configureAppsink(); diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp index 2cdf1c2b44..ad0e5bf667 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp @@ -210,30 +210,29 @@ VPLCPUAccelerationPolicy::create_surface_pool(size_t pool_size, size_t surface_s } VPLCPUAccelerationPolicy::pool_key_t -VPLCPUAccelerationPolicy::create_surface_pool(const mfxFrameAllocRequest& alloc_request, mfxVideoParam& param) { +VPLCPUAccelerationPolicy::create_surface_pool(const mfxFrameAllocRequest& alloc_request, mfxFrameInfo& info) { // External (application) allocation of decode surfaces GAPI_LOG_DEBUG(nullptr, "Query mfxFrameAllocRequest.NumFrameSuggested: " << alloc_request.NumFrameSuggested << ", mfxFrameAllocRequest.Type: " << alloc_request.Type); - mfxU32 singleSurfaceSize = utils::GetSurfaceSize_(param.mfx.FrameInfo.FourCC, - param.mfx.FrameInfo.Width, - param.mfx.FrameInfo.Height); + mfxU32 singleSurfaceSize = utils::GetSurfaceSize_(info.FourCC, + info.Width, + info.Height); if (!singleSurfaceSize) { throw std::runtime_error("Cannot determine surface size for: fourCC: " + - std::to_string(param.mfx.FrameInfo.FourCC) + - ", width: " + std::to_string(param.mfx.FrameInfo.Width) + - ", height: " + std::to_string(param.mfx.FrameInfo.Height)); + std::to_string(info.FourCC) + + ", width: " + std::to_string(info.Width) + + ", height: " + std::to_string(info.Height)); } - const auto &frameInfo = param.mfx.FrameInfo; auto surface_creator = - [&frameInfo] (std::shared_ptr out_buf_ptr, size_t out_buf_ptr_offset, + [&info] (std::shared_ptr out_buf_ptr, size_t out_buf_ptr_offset, size_t out_buf_size) -> surface_ptr_t { - return (frameInfo.FourCC == MFX_FOURCC_RGB4) ? - utils::create_surface_RGB4_(frameInfo, out_buf_ptr, out_buf_ptr_offset, + return (info.FourCC == MFX_FOURCC_RGB4) ? + utils::create_surface_RGB4_(info, out_buf_ptr, out_buf_ptr_offset, out_buf_size) : - utils::create_surface_other_(frameInfo, out_buf_ptr, out_buf_ptr_offset, + utils::create_surface_other_(info, out_buf_ptr, out_buf_ptr_offset, out_buf_size);}; return create_surface_pool(alloc_request.NumFrameSuggested, diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp index fdc0afd4bf..8a2061dce0 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp @@ -32,7 +32,7 @@ struct GAPI_EXPORTS VPLCPUAccelerationPolicy final : public VPLAccelerationPolic void init(session_t session) override; void deinit(session_t session) override; pool_key_t create_surface_pool(size_t pool_size, size_t surface_size_bytes, surface_ptr_ctr_t creator); - pool_key_t create_surface_pool(const mfxFrameAllocRequest& alloc_request, mfxVideoParam& param) override; + pool_key_t create_surface_pool(const mfxFrameAllocRequest& alloc_request, mfxFrameInfo& info) override; surface_weak_ptr_t get_free_surface(pool_key_t key) override; size_t get_free_surface_count(pool_key_t key) const override; size_t get_surface_count(pool_key_t key) const override; diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp index f528190ad5..02720f3774 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp @@ -98,9 +98,7 @@ void VPLDX11AccelerationPolicy::deinit(session_t session) { VPLDX11AccelerationPolicy::pool_key_t VPLDX11AccelerationPolicy::create_surface_pool(const mfxFrameAllocRequest& alloc_req, - mfxVideoParam& param) { - param.IOPattern = MFX_IOPATTERN_OUT_VIDEO_MEMORY; - + mfxFrameInfo& info) { // allocate textures by explicit request mfxFrameAllocResponse mfxResponse; mfxStatus sts = on_alloc(&alloc_req, &mfxResponse); @@ -120,7 +118,7 @@ VPLDX11AccelerationPolicy::create_surface_pool(const mfxFrameAllocRequest& alloc pool_t pool(numSurfaces); for (int i = 0; i < numSurfaces; i++) { std::unique_ptr handle(new mfxFrameSurface1 {}); - handle->Info = param.mfx.FrameInfo; + handle->Info = info; handle->Data.MemId = mfxResponse.mids[i]; pool.push_back(Surface::create_surface(std::move(handle), table_it->second)); @@ -261,24 +259,54 @@ mfxStatus VPLDX11AccelerationPolicy::on_alloc(const mfxFrameAllocRequest *reques desc.Format = colorFormat; desc.SampleDesc.Count = 1; desc.Usage = D3D11_USAGE_DEFAULT; - desc.MiscFlags = D3D11_RESOURCE_MISC_SHARED; + desc.MiscFlags = 0; desc.BindFlags = D3D11_BIND_DECODER; + if ((MFX_MEMTYPE_FROM_VPPIN & request->Type) && (DXGI_FORMAT_YUY2 == desc.Format) || + (DXGI_FORMAT_B8G8R8A8_UNORM == desc.Format) || + (DXGI_FORMAT_R10G10B10A2_UNORM == desc.Format) || + (DXGI_FORMAT_R16G16B16A16_UNORM == desc.Format)) { + desc.BindFlags = D3D11_BIND_RENDER_TARGET; + } + + if ((MFX_MEMTYPE_FROM_VPPOUT & request->Type) || + (MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET & request->Type)) { + desc.BindFlags = D3D11_BIND_RENDER_TARGET; + } + if (request->Type & MFX_MEMTYPE_SHARED_RESOURCE) { desc.BindFlags |= D3D11_BIND_SHADER_RESOURCE; desc.MiscFlags = D3D11_RESOURCE_MISC_SHARED; } - ComPtrGuard main_texture = createCOMPtrGuard(); + if (DXGI_FORMAT_P8 == desc.Format) { + desc.BindFlags = 0; + } + + size_t main_textures_count = 1; + if (D3D11_BIND_RENDER_TARGET & desc.BindFlags) { + GAPI_LOG_DEBUG(nullptr, "Use array of testures instead of texture array"); + desc.ArraySize = 1; + main_textures_count = request->NumFrameSuggested; + } + + // create GPU textures HRESULT err = S_OK; - { - ID3D11Texture2D *pTexture2D = nullptr; - err = hw_handle->CreateTexture2D(&desc, nullptr, &pTexture2D); - if (FAILED(err)) { - GAPI_LOG_WARNING(nullptr, "Cannot create texture, error: " + std::to_string(HRESULT_CODE(err))); - return MFX_ERR_MEMORY_ALLOC; + std::vector> main_textures; + main_textures.reserve(main_textures_count); + for (size_t i = 0; i < main_textures_count; i++) { + ComPtrGuard main_texture = createCOMPtrGuard(); + { + ID3D11Texture2D *pTexture2D = nullptr; + err = hw_handle->CreateTexture2D(&desc, nullptr, &pTexture2D); + if (FAILED(err)) { + GAPI_LOG_WARNING(nullptr, "Cannot create texture by index: " << i << + ", error: " << std::to_string(HRESULT_CODE(err))); + return MFX_ERR_MEMORY_ALLOC; + } + main_texture.reset(pTexture2D); } - main_texture.reset(pTexture2D); + main_textures.push_back(std::move(main_texture)); } // create staging texture to read it from @@ -308,7 +336,7 @@ mfxStatus VPLDX11AccelerationPolicy::on_alloc(const mfxFrameAllocRequest *reques DX11AllocationRecord::create(request->NumFrameSuggested, device_context, allocator, - std::move(main_texture), + std::move(main_textures), std::move(staging_textures))); if (!inserted_it.second) { GAPI_LOG_WARNING(nullptr, "Cannot assign allocation by id: " + std::to_string(request->AllocId) + @@ -363,7 +391,7 @@ mfxStatus VPLDX11AccelerationPolicy::on_get_hdl(mfxMemId mid, mfxHDL *handle) { pPair->second = static_cast(reinterpret_cast( static_cast(data->get_subresource()))); - GAPI_LOG_DEBUG(nullptr, "texture : " << pPair->first << ", sub id: " << pPair->second); + GAPI_LOG_DEBUG(nullptr, "ID3D11Texture2D : " << pPair->first << ", sub id: " << pPair->second); return MFX_ERR_NONE; } diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp index e053089587..893698eb36 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp @@ -43,7 +43,7 @@ struct GAPI_EXPORTS VPLDX11AccelerationPolicy final: public VPLAccelerationPolic void init(session_t session) override; void deinit(session_t session) override; pool_key_t create_surface_pool(const mfxFrameAllocRequest& alloc_request, - mfxVideoParam& param) override; + mfxFrameInfo& info) override; surface_weak_ptr_t get_free_surface(pool_key_t key) override; size_t get_free_surface_count(pool_key_t key) const override; size_t get_surface_count(pool_key_t key) const override; diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp index a9059c29ef..b1d7c25bb1 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp @@ -54,7 +54,7 @@ struct VPLAccelerationPolicy // for existing workspace in existing pool (see realloc) // thus it is not implemented, // PLEASE provide initial memory area large enough - virtual pool_key_t create_surface_pool(const mfxFrameAllocRequest& alloc_request, mfxVideoParam& param) = 0; + virtual pool_key_t create_surface_pool(const mfxFrameAllocRequest& alloc_request, mfxFrameInfo& info) = 0; virtual surface_weak_ptr_t get_free_surface(pool_key_t key) = 0; virtual size_t get_free_surface_count(pool_key_t key) const = 0; diff --git a/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.cpp b/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.cpp index 3bbfb25b0a..574860e03d 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.cpp @@ -96,6 +96,7 @@ void LockAdapter::unlock_write(mfxMemId mid, mfxFrameData &data) { SharedLock* LockAdapter::set_adaptee(SharedLock* new_impl) { SharedLock* old_impl = impl; + GAPI_DbgAssert(old_impl == nullptr || new_impl == nullptr && "Must not be previous impl"); impl = new_impl; return old_impl; } @@ -355,13 +356,14 @@ DX11AllocationRecord::~DX11AllocationRecord() { GAPI_LOG_DEBUG(nullptr, "release final referenced texture: " << texture_ptr.get()); } -void DX11AllocationRecord::init(unsigned int items, - ID3D11DeviceContext* origin_ctx, +void DX11AllocationRecord::init(unsigned int items, ID3D11DeviceContext* origin_ctx, mfxFrameAllocator origin_allocator, - ComPtrGuard&& texture, + std::vector> &&textures, std::vector> &&staging_textures) { + GAPI_DbgAssert(items != 0 && "Cannot create DX11AllocationRecord with empty items"); GAPI_DbgAssert(items == staging_textures.size() && "Allocation items count and staging size are not equal"); + GAPI_DbgAssert(textures.size() != 1 ? items == textures.size() : true && "Allocation items count and staging size are not equal"); GAPI_DbgAssert(origin_ctx && "Cannot create DX11AllocationItem for empty origin_ctx"); auto shared_allocator_copy = origin_allocator; @@ -374,13 +376,22 @@ void DX11AllocationRecord::init(unsigned int items, shared_allocator_copy.pthis = nullptr; - GAPI_LOG_DEBUG(nullptr, "subresources count: " << items << ", text: " << texture.get()); + GAPI_LOG_DEBUG(nullptr, "subresources count: " << items); resources.reserve(items); - // no AddRef here, because DX11AllocationRecord receive ownership it here - texture_ptr = createCOMSharedPtrGuard(std::move(texture)); + + if (textures.size() == 1) { + texture_ptr = createCOMSharedPtrGuard(std::move(textures[0])); + } for(unsigned int i = 0; i < items; i++) { - resources.emplace_back(new DX11AllocationItem(get_ptr(), origin_ctx, shared_allocator_copy, - texture_ptr, i, std::move(staging_textures[i]))); + if (textures.size() == 1) { + GAPI_LOG_DEBUG(nullptr, "subresources: [" << i <<", " << items << "], ID3D11Texture2D: " << texture_ptr.get()); + resources.emplace_back(new DX11AllocationItem(get_ptr(), origin_ctx, shared_allocator_copy, + texture_ptr, i, std::move(staging_textures[i]))); + } else { + GAPI_LOG_DEBUG(nullptr, "subresources: [" << i <<", " << items << "], ID3D11Texture2D: " << textures[i].get()); + resources.emplace_back(new DX11AllocationItem(get_ptr(), origin_ctx, shared_allocator_copy, + std::move(textures[i]), 0, std::move(staging_textures[i]))); + } } } diff --git a/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.hpp b/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.hpp index 46ddff86a4..c68a08a3f8 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/dx11_alloc_resource.hpp @@ -133,10 +133,10 @@ struct DX11AllocationRecord : public std::enable_shared_from_this&& texture, std::vector> &&staging_textures); - + std::vector>&& textures, std::vector> &&staging_textures); std::vector resources; ComSharedPtrGuard texture_ptr; }; diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp index 04cf10c8d7..6afa2cf0b6 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp @@ -48,7 +48,8 @@ VPLMediaFrameDX11Adapter::VPLMediaFrameDX11Adapter(std::shared_ptr surf Surface::data_t& data = parent_surface_ptr->get_data(); GAPI_LOG_DEBUG(nullptr, "surface: " << parent_surface_ptr->get_handle() << ", w: " << info.Width << ", h: " << info.Height << - ", p: " << data.Pitch); + ", p: " << data.Pitch << + ", frame id: " << reinterpret_cast(this)); switch(info.FourCC) { case MFX_FOURCC_I420: @@ -72,6 +73,9 @@ VPLMediaFrameDX11Adapter::~VPLMediaFrameDX11Adapter() { // Each VPLMediaFrameDX11Adapter releases mfx surface counter // The last VPLMediaFrameDX11Adapter releases shared Surface pointer // The last surface pointer releases workspace memory + + GAPI_LOG_DEBUG(nullptr, "destroy frame id: " << reinterpret_cast(this)); + Surface::data_t& data = parent_surface_ptr->get_data(); LockAdapter* alloc_data = reinterpret_cast(data.MemId); alloc_data->set_adaptee(nullptr); @@ -155,30 +159,44 @@ MediaFrame::View VPLMediaFrameDX11Adapter::access(MediaFrame::Access mode) { } cv::util::any VPLMediaFrameDX11Adapter::blobParams() const { + /*GAPI_Assert(false && "VPLMediaFrameDX11Adapter::blobParams() is not fully integrated" + "in OpenVINO InferenceEngine and would be temporary disable.");*/ #ifdef HAVE_INF_ENGINE - GAPI_Assert(false && "VPLMediaFrameDX11Adapter::blobParams() is not fully operable " - "in G-API streaming. Please waiting for future PRs"); - Surface::data_t& data = parent_surface_ptr->get_data(); + const Surface::info_t& info = parent_surface_ptr->get_info(); NativeHandleAdapter* native_handle_getter = reinterpret_cast(data.MemId); mfxHDLPair handle{}; native_handle_getter->get_handle(data.MemId, reinterpret_cast(handle)); - InferenceEngine::ParamMap params{{"SHARED_MEM_TYPE", "VA_SURFACE"}, - {"DEV_OBJECT_HANDLE", handle.first}, - {"COLOR_FORMAT", InferenceEngine::ColorFormat::NV12}, - {"VA_PLANE", + GAPI_Assert(frame_desc.fmt == MediaFormat::NV12 && + "blobParams() for VPLMediaFrameDX11Adapter supports NV12 only"); + + InferenceEngine::ParamMap y_params{{"SHARED_MEM_TYPE", "VA_SURFACE"}, + {"DEV_OBJECT_HANDLE", handle.first}, + {"COLOR_FORMAT", InferenceEngine::ColorFormat::NV12}, + {"VA_PLANE", static_cast( reinterpret_cast( reinterpret_cast( handle.second)))}};//, - const Surface::info_t& info = parent_surface_ptr->get_info(); - InferenceEngine::TensorDesc tdesc({InferenceEngine::Precision::U8, - {1, 3, static_cast(info.Height), - static_cast(info.Width)}, - InferenceEngine::Layout::NCHW}); - return std::make_pair(tdesc, params); + InferenceEngine::TensorDesc y_tdesc({InferenceEngine::Precision::U8, + {1, 1, static_cast(info.Height), + static_cast(info.Width)}, + InferenceEngine::Layout::NHWC}); + + InferenceEngine::ParamMap uv_params = y_params; + uv_params["MEM_HANDLE"] = handle.first; + uv_params["VA_PLANE"] = static_cast( + reinterpret_cast( + reinterpret_cast( + handle.second))) + 1; + InferenceEngine::TensorDesc uv_tdesc({InferenceEngine::Precision::U8, + {1, 2, static_cast(info.Height) / 2, + static_cast(info.Width) / 2}, + InferenceEngine::Layout::NHWC}); + return std::make_pair(std::make_pair(y_tdesc, y_params), + std::make_pair(uv_tdesc, uv_params)); #else GAPI_Assert(false && "VPLMediaFrameDX11Adapter::blobParams() is not implemented"); #endif // HAVE_INF_ENGINE diff --git a/modules/gapi/src/streaming/onevpl/cfg_params.cpp b/modules/gapi/src/streaming/onevpl/cfg_params.cpp index 599f751358..b13f9cadb1 100644 --- a/modules/gapi/src/streaming/onevpl/cfg_params.cpp +++ b/modules/gapi/src/streaming/onevpl/cfg_params.cpp @@ -118,6 +118,82 @@ CfgParam CfgParam::create_implementation(const char* value) { return CfgParam::create(CfgParam::implementation_name(), std::string(value)); } +CfgParam CfgParam::create_vpp_frames_pool_size(size_t value) { + // NB: cast to uint64_t because CfgParam inner variant works over + // uint64_t instead of size_t and mirrored VPL types variety + // but size_t looks more friendly for C++ high-level development + return CfgParam::create(CfgParam::vpp_frames_pool_size_name(), + static_cast(value), false); +} + +CfgParam CfgParam::create_vpp_in_width(uint16_t value) { + return CfgParam::create(CfgParam::vpp_in_width_name(), value, false); +} + +CfgParam CfgParam::create_vpp_in_height(uint16_t value) { + return CfgParam::create(CfgParam::vpp_in_height_name(), value, false); +} + +CfgParam CfgParam::create_vpp_in_crop_x(uint16_t value) { + return CfgParam::create(CfgParam::vpp_in_crop_x_name(), value, false); +} + +CfgParam CfgParam::create_vpp_in_crop_y(uint16_t value) { + return CfgParam::create(CfgParam::vpp_in_crop_y_name(), value, false); +} + +CfgParam CfgParam::create_vpp_in_crop_w(uint16_t value) { + return CfgParam::create(CfgParam::vpp_in_crop_w_name(), value, false); +} + +CfgParam CfgParam::create_vpp_in_crop_h(uint16_t value) { + return CfgParam::create(CfgParam::vpp_in_crop_h_name(), value, false); +} + +CfgParam CfgParam::create_vpp_out_fourcc(uint32_t value) { + return CfgParam::create(CfgParam::vpp_out_fourcc_name(), value, false); +} + +CfgParam CfgParam::create_vpp_out_chroma_format(uint16_t value) { + return CfgParam::create(CfgParam::vpp_out_chroma_format_name(), value, false); +} + +CfgParam CfgParam::create_vpp_out_width(uint16_t value) { + return CfgParam::create(CfgParam::vpp_out_width_name(), value, false); +} + +CfgParam CfgParam::create_vpp_out_height(uint16_t value) { + return CfgParam::create(CfgParam::vpp_out_height_name(), value, false); +} + +CfgParam CfgParam::create_vpp_out_crop_x(uint16_t value) { + return CfgParam::create(CfgParam::vpp_out_crop_x_name(), value, false); +} + +CfgParam CfgParam::create_vpp_out_crop_y(uint16_t value) { + return CfgParam::create(CfgParam::vpp_out_crop_y_name(), value, false); +} + +CfgParam CfgParam::create_vpp_out_crop_w(uint16_t value) { + return CfgParam::create(CfgParam::vpp_out_crop_w_name(), value, false); +} + +CfgParam CfgParam::create_vpp_out_crop_h(uint16_t value) { + return CfgParam::create(CfgParam::vpp_out_crop_h_name(), value, false); +} + +CfgParam CfgParam::create_vpp_out_pic_struct(uint16_t value) { + return CfgParam::create(CfgParam::vpp_out_pic_struct_name(), value, false); +} + +CfgParam CfgParam::create_vpp_out_framerate_n(uint32_t value) { + return CfgParam::create(CfgParam::vpp_out_framerate_n_name(), value, false); +} + +CfgParam CfgParam::create_vpp_out_framerate_d(uint32_t value) { + return CfgParam::create(CfgParam::vpp_out_framerate_d_name(), value, false); +} + CfgParam& CfgParam::operator=(const CfgParam& src) { if (this != &src) { m_priv = src.m_priv; diff --git a/modules/gapi/src/streaming/onevpl/cfg_params_parser.cpp b/modules/gapi/src/streaming/onevpl/cfg_params_parser.cpp index 07c639faa2..d748825b1c 100644 --- a/modules/gapi/src/streaming/onevpl/cfg_params_parser.cpp +++ b/modules/gapi/src/streaming/onevpl/cfg_params_parser.cpp @@ -35,6 +35,12 @@ struct ParamCreator { return create_impl(name, value); } private: + mfxVariant create_impl(const std::string&, mfxU16 value) { + mfxVariant ret; + ret.Type = MFX_VARIANT_TYPE_U16; + ret.Data.U16 = value; + return ret; + } mfxVariant create_impl(const std::string&, mfxU32 value) { mfxVariant ret; ret.Type = MFX_VARIANT_TYPE_U32; @@ -53,6 +59,10 @@ private: ret.Data.U64 = value; return ret; } + mfxVariant create_impl(const std::string&, const std::string&) { + GAPI_Assert(false && "Something wrong: you should not create mfxVariant " + "from string directly - native type is lost in this case"); + } }; template @@ -86,6 +96,76 @@ std::vector get_params_from_string(const std::string& str) { ret.push_back(creator.create(name, cstr_to_mfx_version(value.c_str()))); } else if (name == CfgParam::frames_pool_size_name()) { ret.push_back(creator.create(name, strtoull_or_throw(value.c_str()), false)); + } else if (name == CfgParam::vpp_frames_pool_size_name()) { + ret.push_back(creator.create(name, strtoull_or_throw(value.c_str()), false)); + } else if (name == CfgParam::vpp_in_width_name()) { + ret.push_back(creator.create(name, + static_cast(strtoul_or_throw(value.c_str())), + false)); + } else if (name == CfgParam::vpp_in_height_name()) { + ret.push_back(creator.create(name, + static_cast(strtoul_or_throw(value.c_str())), + false)); + } else if (name == CfgParam::vpp_in_crop_w_name()) { + ret.push_back(creator.create(name, + static_cast(strtoul_or_throw(value.c_str())), + false)); + } else if (name == CfgParam::vpp_in_crop_h_name()) { + ret.push_back(creator.create(name, + static_cast(strtoul_or_throw(value.c_str())), + false)); + } else if (name == CfgParam::vpp_in_crop_x_name()) { + ret.push_back(creator.create(name, + static_cast(strtoul_or_throw(value.c_str())), + false)); + } else if (name == CfgParam::vpp_in_crop_y_name()) { + ret.push_back(creator.create(name, + static_cast(strtoul_or_throw(value.c_str())), + false)); + } else if (name == CfgParam::vpp_out_fourcc_name()) { + ret.push_back(creator.create(name, + static_cast(strtoul_or_throw(value.c_str())), + false)); + } else if (name == CfgParam::vpp_out_chroma_format_name()) { + ret.push_back(creator.create(name, + static_cast(strtoul_or_throw(value.c_str())), + false)); + } else if (name == CfgParam::vpp_out_width_name()) { + ret.push_back(creator.create(name, + static_cast(strtoul_or_throw(value.c_str())), + false)); + } else if (name == CfgParam::vpp_out_height_name()) { + ret.push_back(creator.create(name, + static_cast(strtoul_or_throw(value.c_str())), + false)); + } else if (name == CfgParam::vpp_out_crop_w_name()) { + ret.push_back(creator.create(name, + static_cast(strtoul_or_throw(value.c_str())), + false)); + } else if (name == CfgParam::vpp_out_crop_h_name()) { + ret.push_back(creator.create(name, + static_cast(strtoul_or_throw(value.c_str())), + false)); + } else if (name == CfgParam::vpp_out_crop_x_name()) { + ret.push_back(creator.create(name, + static_cast(strtoul_or_throw(value.c_str())), + false)); + } else if (name == CfgParam::vpp_out_crop_y_name()) { + ret.push_back(creator.create(name, + static_cast(strtoul_or_throw(value.c_str())), + false)); + } else if (name == CfgParam::vpp_out_pic_struct_name()) { + ret.push_back(creator.create(name, + static_cast(strtoul_or_throw(value.c_str())), + false)); + } else if (name == CfgParam::vpp_out_framerate_n_name()) { + ret.push_back(creator.create(name, + static_cast(strtoul_or_throw(value.c_str())), + false)); + } else if (name == CfgParam::vpp_out_framerate_d_name()) { + ret.push_back(creator.create(name, + static_cast(strtoul_or_throw(value.c_str())), + false)); } else { GAPI_LOG_DEBUG(nullptr, "Cannot parse configuration param, name: " << name << ", value: " << value); @@ -128,6 +208,45 @@ mfxVariant cfg_param_to_mfx_variant(const CfgParam& cfg_val) { return ret; } +void extract_optional_param_by_name(const std::string &name, + const std::vector &in_params, + cv::util::optional &out_param) { + auto it = std::find_if(in_params.begin(), in_params.end(), [&name] (const CfgParam& value) { + return value.get_name() == name; + }); + if (it != in_params.end()) { + cv::util::visit(cv::util::overload_lambdas( + [&out_param](uint8_t value) { out_param = cv::util::make_optional(static_cast(value)); }, + [&out_param](int8_t value) { out_param = cv::util::make_optional(static_cast(value)); }, + [&out_param](uint16_t value) { out_param = cv::util::make_optional(static_cast(value)); }, + [&out_param](int16_t value) { out_param = cv::util::make_optional(static_cast(value)); }, + [&out_param](uint32_t value) { out_param = cv::util::make_optional(static_cast(value)); }, + [&out_param](int32_t value) { out_param = cv::util::make_optional(static_cast(value)); }, + [&out_param](uint64_t value) { out_param = cv::util::make_optional(static_cast(value)); }, + [&out_param](int64_t value) { out_param = cv::util::make_optional(static_cast(value)); }, + [&out_param](float_t value) { out_param = cv::util::make_optional(static_cast(value)); }, + [&out_param](double_t value) { out_param = cv::util::make_optional(static_cast(value)); }, + [&out_param](void*) { GAPI_Assert(false && "`void*` is unsupported type"); }, + [&out_param](const std::string& value) { + out_param = cv::util::make_optional(strtoull_or_throw(value.c_str())); + }), + it->get_value()); + } +} + +unsigned long strtoul_or_throw(const char* str) { + char *end_ptr = nullptr; + errno = 0; + unsigned long ret = strtoul(str, &end_ptr, 10); + if ((end_ptr == str) || + ((ret == ULONG_MAX || ret == LONG_MIN) && errno == ERANGE)) { + // nothing parsed from the string, handle errors or exit + GAPI_LOG_WARNING(nullptr, "strtoul failed for: " << str); + GAPI_Assert(false && "strtoul_or_throw"); + } + return ret; +} + size_t strtoull_or_throw(const char* str) { char *end_ptr = nullptr; errno = 0; diff --git a/modules/gapi/src/streaming/onevpl/cfg_params_parser.hpp b/modules/gapi/src/streaming/onevpl/cfg_params_parser.hpp index c5e7685756..e01d5c412a 100644 --- a/modules/gapi/src/streaming/onevpl/cfg_params_parser.hpp +++ b/modules/gapi/src/streaming/onevpl/cfg_params_parser.hpp @@ -31,6 +31,11 @@ struct ParamCreator { mfxVariant cfg_param_to_mfx_variant(const CfgParam& value); +void extract_optional_param_by_name(const std::string &name, + const std::vector& in_params, + cv::util::optional &out_param); + +unsigned long strtoul_or_throw(const char* str); size_t strtoull_or_throw(const char* str); int64_t strtoll_or_throw(const char* str); diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp index 6707a401b1..d8af94f939 100644 --- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp @@ -26,6 +26,31 @@ namespace gapi { namespace wip { namespace onevpl { +void VPLLegacyDecodeEngine::try_modify_pool_size_request_param(const char* param_name, + size_t new_frames_count, + mfxFrameAllocRequest& request) { + if (new_frames_count < request.NumFrameMin) { + GAPI_LOG_WARNING(nullptr, "Cannot proceed with CfgParam \"" << param_name << "\": " << + new_frames_count << ". It must be equal or greater than " + "mfxFrameAllocRequest.NumFrameMin: " << request.NumFrameMin); + throw std::runtime_error(std::string("Invalid value of param: ") + + param_name + ", underflow"); + } else { + if (static_cast(std::numeric_limits::max()) < new_frames_count) { + GAPI_LOG_WARNING(nullptr, "Cannot proceed with CfgParam \"" << param_name << "\": " << + new_frames_count << ". It must not be greater than " << + std::numeric_limits::max()); + throw std::runtime_error(std::string("Invalid value of param: ") + + param_name + ", overflow"); + } + request.NumFrameSuggested = static_cast(new_frames_count); + GAPI_LOG_DEBUG(nullptr, "mfxFrameAllocRequest overriden by user input: " << + ", mfxFrameAllocRequest.NumFrameMin: " << request.NumFrameMin << + ", mfxFrameAllocRequest.NumFrameSuggested: " << request.NumFrameSuggested << + ", mfxFrameAllocRequest.Type: " << request.Type); + } +} + VPLLegacyDecodeEngine::VPLLegacyDecodeEngine(std::unique_ptr&& accel) : ProcessingEngineBase(std::move(accel)) { @@ -138,11 +163,12 @@ VPLLegacyDecodeEngine::VPLLegacyDecodeEngine(std::unique_ptr& cfg_params, - std::shared_ptr provider) { - GAPI_DbgAssert(provider && "Cannot create decoder, data provider is nullptr"); +VPLLegacyDecodeEngine::SessionParam VPLLegacyDecodeEngine::prepare_session_param( + mfxSession mfx_session, + const std::vector& cfg_params, + std::shared_ptr provider) { + + GAPI_DbgAssert(provider && "Cannot create decoder, data provider is nullptr"); // init session acceleration_policy->init(mfx_session); @@ -206,78 +232,55 @@ VPLLegacyDecodeEngine::initialize_session(mfxSession mfx_session, ", mfxFrameAllocRequest.Type: " << decRequest.Type); // NB: override NumFrameSuggested preallocation size (how many frames we can hold) - size_t preallocated_frames_count = decRequest.NumFrameSuggested; - // NB: if you see bunch of WARNING about "cannot get free surface from pool" - // and have abundant RAM size then increase `preallocated_frames_count` + // if you see bunch of WARNING about "cannot get free surface from pool" + // and have abundant RAM size then increase `CfgParam::frames_pool_size_name()` // to keep more free surfaces in a round. Otherwise VPL decode pipeline will be waiting // till application is freeing unusable surface on its side. // - auto queue_capacity_it = std::find_if(cfg_params.begin(), cfg_params.end(), [] (const CfgParam& value) { - return value.get_name() == CfgParam::frames_pool_size_name(); - }); - if (queue_capacity_it != cfg_params.end()) { - cv::util::visit(cv::util::overload_lambdas( - [&preallocated_frames_count](uint8_t value) { preallocated_frames_count = static_cast(value); }, - [&preallocated_frames_count](int8_t value) { preallocated_frames_count = static_cast(value); }, - [&preallocated_frames_count](uint16_t value) { preallocated_frames_count = static_cast(value); }, - [&preallocated_frames_count](int16_t value) { preallocated_frames_count = static_cast(value); }, - [&preallocated_frames_count](uint32_t value) { preallocated_frames_count = static_cast(value); }, - [&preallocated_frames_count](int32_t value) { preallocated_frames_count = static_cast(value); }, - [&preallocated_frames_count](uint64_t value) { preallocated_frames_count = static_cast(value); }, - [&preallocated_frames_count](int64_t value) { preallocated_frames_count = static_cast(value); }, - [&preallocated_frames_count](float_t value) { preallocated_frames_count = static_cast(value); }, - [&preallocated_frames_count](double_t value) { preallocated_frames_count = static_cast(value); }, - [&preallocated_frames_count](void*) { GAPI_Assert(false && "`void*` is unsupported type"); }, - [&preallocated_frames_count](const std::string& value) { - preallocated_frames_count = strtoull_or_throw(value.c_str()); - }), - queue_capacity_it->get_value()); - + cv::optional preallocated_frames_count_cfg; + extract_optional_param_by_name(CfgParam::frames_pool_size_name(), + cfg_params, + preallocated_frames_count_cfg); + if (preallocated_frames_count_cfg.has_value()) { GAPI_LOG_INFO(nullptr, "Try to use CfgParam \"" << CfgParam::frames_pool_size_name() << "\": " << - preallocated_frames_count << ", for session: " << mfx_session); + preallocated_frames_count_cfg.value() << ", for session: " << mfx_session); + try_modify_pool_size_request_param(CfgParam::frames_pool_size_name(), + preallocated_frames_count_cfg.value(), + decRequest); } - if (preallocated_frames_count < decRequest.NumFrameMin) { - GAPI_LOG_WARNING(nullptr, "Cannot proceed with CfgParam \"" << CfgParam::frames_pool_size_name() << "\": " << - preallocated_frames_count << ". It must be equal or greater than " - "mfxFrameAllocRequest.NumFrameMin: " << decRequest.NumFrameMin); - throw std::runtime_error(std::string("Invalid value of param: ") + - CfgParam::frames_pool_size_name() + ", underflow"); - } else { - if (static_cast(std::numeric_limits::max()) < preallocated_frames_count) { - GAPI_LOG_WARNING(nullptr, "Cannot proceed with CfgParam \"" << CfgParam::frames_pool_size_name() << "\": " << - preallocated_frames_count << ". It must not be equal than " << - std::numeric_limits::max()); - throw std::runtime_error(std::string("Invalid value of param: ") + - CfgParam::frames_pool_size_name() + ", overflow"); - } - decRequest.NumFrameSuggested = static_cast(preallocated_frames_count); - GAPI_LOG_DEBUG(nullptr, "mfxFrameAllocRequest overriden by user input for session: " << mfx_session << - ", mfxFrameAllocRequest.NumFrameMin: " << decRequest.NumFrameMin << - ", mfxFrameAllocRequest.NumFrameSuggested: " << decRequest.NumFrameSuggested << - ", mfxFrameAllocRequest.Type: " << decRequest.Type); - } + decRequest.Type |= MFX_MEMTYPE_EXTERNAL_FRAME | MFX_MEMTYPE_FROM_DECODE | MFX_MEMTYPE_FROM_VPPIN; VPLAccelerationPolicy::pool_key_t decode_pool_key = - acceleration_policy->create_surface_pool(decRequest, mfxDecParams); + acceleration_policy->create_surface_pool(decRequest, mfxDecParams.mfx.FrameInfo); // Input parameters finished, now initialize decode // create decoder for session accoring to header recovered from source file + sts = MFXVideoDECODE_Init(mfx_session, &mfxDecParams); if (MFX_ERR_NONE != sts) { throw std::runtime_error("Error initializing Decode, error: " + mfxstatus_to_string(sts)); } - DecoderParams decoder_param {bitstream, mfxDecParams}; + return {decode_pool_key, {bitstream, mfxDecParams, preallocated_frames_count_cfg}}; +} + + +ProcessingEngineBase::session_ptr +VPLLegacyDecodeEngine::initialize_session(mfxSession mfx_session, + const std::vector& cfg_params, + std::shared_ptr provider) { + + SessionParam param = prepare_session_param(mfx_session, cfg_params, provider); // create session std::shared_ptr sess_ptr = register_session(mfx_session, - std::move(decoder_param), + std::move(param.decoder_params), provider); - sess_ptr->init_surface_pool(decode_pool_key); + sess_ptr->init_surface_pool(param.decode_pool_key); // prepare working decode surface sess_ptr->swap_surface(*this); return sess_ptr; diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp index f6a02db3db..1b7bee6a82 100644 --- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp @@ -24,20 +24,31 @@ struct DecoderParams; struct IDataProvider; struct VPLAccelerationPolicy; -class VPLLegacyDecodeEngine : public ProcessingEngineBase { +class GAPI_EXPORTS VPLLegacyDecodeEngine : public ProcessingEngineBase { public: VPLLegacyDecodeEngine(std::unique_ptr&& accel); - session_ptr initialize_session(mfxSession mfx_session, - const std::vector& cfg_params, - std::shared_ptr provider) override; + virtual session_ptr initialize_session(mfxSession mfx_session, + const std::vector& cfg_params, + std::shared_ptr provider) override; +protected: + struct SessionParam { + void* decode_pool_key; + DecoderParams decoder_params; + }; + + SessionParam prepare_session_param(mfxSession mfx_session, + const std::vector& cfg_params, + std::shared_ptr provider); -private: ExecutionStatus execute_op(operation_t& op, EngineSession& sess) override; ExecutionStatus process_error(mfxStatus status, LegacyDecodeSession& sess); void on_frame_ready(LegacyDecodeSession& sess, mfxFrameSurface1* ready_surface); + static void try_modify_pool_size_request_param(const char* param_name, + size_t new_frames_count, + mfxFrameAllocRequest& request); }; } // namespace onevpl } // namespace wip diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp index bbb1378767..56e51ffd9f 100644 --- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp @@ -11,7 +11,6 @@ #include "streaming/onevpl/engine/decode/decode_session.hpp" #include "streaming/onevpl/engine/decode/decode_engine_legacy.hpp" -#include "streaming/onevpl/accelerators/accel_policy_interface.hpp" #include "streaming/onevpl/accelerators/surface/surface.hpp" #include "streaming/onevpl/utils.hpp" @@ -75,8 +74,8 @@ Data::Meta LegacyDecodeSession::generate_frame_meta() { return meta; } -const mfxVideoParam& LegacyDecodeSession::get_video_param() const { - return mfx_decoder_param; +const mfxFrameInfo& LegacyDecodeSession::get_video_param() const { + return mfx_decoder_param.mfx.FrameInfo; } } // namespace onevpl } // namespace wip diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp index 476a575172..356f9851cd 100644 --- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp @@ -26,9 +26,10 @@ struct IDataProvider; class Surface; struct VPLAccelerationPolicy; -class LegacyDecodeSession : public EngineSession { +class GAPI_EXPORTS LegacyDecodeSession : public EngineSession { public: friend class VPLLegacyDecodeEngine; + friend class VPLLegacyTranscodeEngine; //TODO: remove friend add method LegacyDecodeSession(mfxSession sess, DecoderParams&& decoder_param, std::shared_ptr provider); ~LegacyDecodeSession(); @@ -38,15 +39,15 @@ public: void init_surface_pool(VPLAccelerationPolicy::pool_key_t key); Data::Meta generate_frame_meta(); - const mfxVideoParam& get_video_param() const override; + virtual const mfxFrameInfo& get_video_param() const override; private: mfxVideoParam mfx_decoder_param; std::shared_ptr data_provider; VPLAccelerationPolicy::pool_key_t decoder_pool_id; mfxFrameAllocRequest request; +protected: std::weak_ptr procesing_surface_ptr; - using op_handle_t = std::pair; std::queue sync_queue; diff --git a/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp b/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp index 67018d0fd7..8a1f4383eb 100644 --- a/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp @@ -29,6 +29,11 @@ namespace onevpl { struct GAPI_EXPORTS DecoderParams { std::shared_ptr stream; mfxVideoParam param; + cv::optional preallocated_frames_count; +}; + +struct GAPI_EXPORTS TranscoderParams { + mfxVideoParam param; }; struct GAPI_EXPORTS EngineSession { @@ -41,7 +46,7 @@ struct GAPI_EXPORTS EngineSession { std::string error_code_to_str() const; virtual ~EngineSession(); - virtual const mfxVideoParam& get_video_param() const = 0; + virtual const mfxFrameInfo& get_video_param() const = 0; }; } // namespace onevpl } // namespace wip diff --git a/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.cpp b/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.cpp index 72f2f62fc4..35cd664219 100644 --- a/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.cpp @@ -36,7 +36,7 @@ ProcessingEngineBase::ExecutionStatus ProcessingEngineBase::process(mfxSession s session_ptr processing_session = sess_it->second; ExecutionData& exec_data = execution_table[session]; - GAPI_LOG_DEBUG(nullptr, "[" << session <<"] start op id: " << exec_data.op_id); + GAPI_LOG_DEBUG(nullptr, "[" << session << "] start op id: " << exec_data.op_id); ExecutionStatus status = execute_op(pipeline.at(exec_data.op_id), *processing_session); size_t old_op_id = exec_data.op_id++; if (exec_data.op_id == pipeline.size()) @@ -44,10 +44,10 @@ ProcessingEngineBase::ExecutionStatus ProcessingEngineBase::process(mfxSession s exec_data.op_id = 0; } cv::util::suppress_unused_warning(old_op_id); - GAPI_LOG_DEBUG(nullptr, "[" << session <<"] finish op id: " << old_op_id << - ", " << processing_session->error_code_to_str() << - ", " << ProcessingEngineBase::status_to_string(status) << - ", next op id: " << exec_data.op_id); + GAPI_LOG_DEBUG(nullptr, "[" << session << "] finish op id: " << old_op_id << + ", " << processing_session->error_code_to_str() << + ", " << ProcessingEngineBase::status_to_string(status) << + ", next op id: " << exec_data.op_id); if (status == ExecutionStatus::Failed) { @@ -81,7 +81,7 @@ const char* ProcessingEngineBase::status_to_string(ExecutionStatus status) ProcessingEngineBase::ExecutionStatus ProcessingEngineBase::execute_op(operation_t& op, EngineSession& sess) { - return op(sess); + return op(sess); } size_t ProcessingEngineBase::get_ready_frames_count() const diff --git a/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.hpp b/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.hpp index 059ef963de..cacc8bd748 100644 --- a/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/processing_engine_base.hpp @@ -67,16 +67,25 @@ protected: std::vector pipeline; std::unique_ptr acceleration_policy; - +public: virtual ExecutionStatus execute_op(operation_t& op, EngineSession& sess); template void create_pipeline(Ops&&...ops) { - GAPI_DbgAssert(pipeline.empty() && "Pipeline must be empty"); std::vector({std::forward(ops)...}).swap(pipeline); } + template + void inject_pipeline_operations(size_t in_position, Ops&&...ops) + { + GAPI_Assert(pipeline.size() >= in_position && + "Invalid position to inject pipeline operation"); + auto it = pipeline.begin(); + std::advance(it, in_position); + pipeline.insert(it, {std::forward(ops)...}); + } + template std::shared_ptr register_session(mfxSession key, SessionArgs&& ...args) diff --git a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp new file mode 100644 index 0000000000..36d36d5ec0 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp @@ -0,0 +1,477 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#ifdef HAVE_ONEVPL + +#include +#include + +#include +#include "streaming/onevpl/data_provider_defines.hpp" + +#include "streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp" +#include "streaming/onevpl/engine/transcode/transcode_session.hpp" +#include "streaming/onevpl/accelerators/accel_policy_interface.hpp" +#include "streaming/onevpl/accelerators/surface/surface.hpp" +#include "streaming/onevpl/cfg_params_parser.hpp" +#include "streaming/onevpl/utils.hpp" +#include "logger.hpp" + +#define ALIGN16(value) (((value + 15) >> 4) << 4) + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { + +template +bool set_vpp_param(const char* name, Type& out_vpp_param, + const std::map ¶ms_storage, + mfxSession session); + +template<> +bool set_vpp_param(const char* name, uint32_t& out_vpp_param, + const std::map ¶ms_storage, + mfxSession session) { + auto it = params_storage.find(name); + if (it != params_storage.end()) { + auto value = it->second.Data.U32; + GAPI_LOG_INFO(nullptr, "[" << session << "] set \"" << name << + "\": " << value); + out_vpp_param = value; + return true; + } + return false; +} + +template<> +bool set_vpp_param(const char* name, uint16_t& out_vpp_param, + const std::map ¶ms_storage, + mfxSession session) { + auto it = params_storage.find(name); + if (it != params_storage.end()) { + auto value = it->second.Data.U16; + GAPI_LOG_INFO(nullptr, "[" << session << "] set \"" << name << + "\": " << value); + out_vpp_param = value; + return true; + } + return false; +} + +std::map + VPLLegacyTranscodeEngine::get_vpp_params(const std::vector &cfg_params) { + std::map ret; + static const char* vpp_param_prefix {"vpp."}; + for (const auto ¶m : cfg_params) { + const char *param_name_cptr = param.get_name().c_str(); + if (strstr(param_name_cptr, vpp_param_prefix) == param_name_cptr) { + ret.emplace(param.get_name(), cfg_param_to_mfx_variant(param)); + } + } + GAPI_LOG_INFO(nullptr, "Detected VPP params count: [" << ret.size() << + "/" << cfg_params.size() << "]"); + return ret; +} + +VPLLegacyTranscodeEngine::VPLLegacyTranscodeEngine(std::unique_ptr&& accel) + : VPLLegacyDecodeEngine(std::move(accel)) { + + GAPI_LOG_INFO(nullptr, "Create Legacy Transcode Engine"); + //inject_pipeline_operations(2, + create_pipeline( + // 1) Read File + [this] (EngineSession& sess) -> ExecutionStatus + { + LegacyTranscodeSession &my_sess = static_cast(sess); + if (!my_sess.data_provider) { + my_sess.last_status = MFX_ERR_MORE_DATA; + return ExecutionStatus::Continue; + } + + my_sess.last_status = MFX_ERR_NONE; + if (!my_sess.data_provider->fetch_bitstream_data(my_sess.stream)) { + my_sess.last_status = MFX_ERR_MORE_DATA; + my_sess.data_provider.reset(); //close source + } + return ExecutionStatus::Continue; + }, + // 2) enqueue ASYNC decode operation + [this] (EngineSession& sess) -> ExecutionStatus + { + LegacyTranscodeSession &my_sess = static_cast(sess); + + // prepare sync object for new surface + LegacyTranscodeSession::op_handle_t sync_pair{}; + + // enqueue decode operation with current session surface + my_sess.last_status = + MFXVideoDECODE_DecodeFrameAsync(my_sess.session, + (my_sess.data_provider || (my_sess.stream && my_sess.stream->DataLength)) + ? my_sess.stream.get() + + : nullptr, /* No more data to read, start decode draining mode*/ + my_sess.procesing_surface_ptr.lock()->get_handle(), + &sync_pair.second, + &sync_pair.first); + + GAPI_LOG_DEBUG(nullptr, "START decode: " << + ", sync id: " << + sync_pair.first << + ", dec in surface: " << + my_sess.procesing_surface_ptr.lock()->get_handle() << + ", dec out surface: " << sync_pair.second << + ", status: " << + mfxstatus_to_string(my_sess.last_status)); + + // process wait-like statuses in-place: + // It had better to use up all VPL decoding resources in pipeline + // as soon as possible. So waiting more free-surface or device free + while (my_sess.last_status == MFX_ERR_MORE_SURFACE || + my_sess.last_status == MFX_WRN_DEVICE_BUSY) { + try { + if (my_sess.last_status == MFX_ERR_MORE_SURFACE) { + my_sess.swap_surface(*this); + } + my_sess.last_status = + MFXVideoDECODE_DecodeFrameAsync(my_sess.session, + my_sess.stream.get(), + my_sess.procesing_surface_ptr.lock()->get_handle(), + &sync_pair.second, + &sync_pair.first); + + } catch (const std::runtime_error& ex) { + // NB: not an error, yield CPU ticks to check + // surface availability at a next phase. + // But print WARNING to notify user about pipeline stuck + GAPI_LOG_WARNING(nullptr, "[" << my_sess.session << + "] has no surface, reason: " << + ex.what()); + break; + } + } + + if (my_sess.last_status == MFX_ERR_NONE) { + my_sess.sync_queue.emplace(sync_pair); + } else if (my_sess.last_status != MFX_ERR_MORE_DATA) /* suppress MFX_ERR_MORE_DATA warning */ { + GAPI_LOG_WARNING(nullptr, "decode pending ops count: " << + my_sess.sync_queue.size() << + ", sync id: " << sync_pair.first << + ", status: " << + mfxstatus_to_string(my_sess.last_status)); + } + return ExecutionStatus::Continue; + }, + // 3) transcode + [this] (EngineSession& sess) -> ExecutionStatus + { + LegacyTranscodeSession &my_sess = static_cast(sess); + + LegacyDecodeSession::op_handle_t last_op {}; + while (!my_sess.sync_queue.empty()) { + do { + if (!my_sess.vpp_surface_ptr.expired()) { + LegacyDecodeSession::op_handle_t pending_op = my_sess.sync_queue.front(); + GAPI_LOG_DEBUG(nullptr, "pending DEC ops count: " << + my_sess.sync_queue.size() << + ", sync id: " << + pending_op.first << + ", surface: " << + pending_op.second << + ", status: " << + mfxstatus_to_string(my_sess.last_status)); + + my_sess.sync_queue.pop(); + auto *dec_surface = pending_op.second; + auto *vpp_suface = my_sess.vpp_surface_ptr.lock()->get_handle(); + my_sess.last_status = MFXVideoVPP_RunFrameVPPAsync(my_sess.session, + dec_surface, + vpp_suface, + nullptr, &pending_op.first); + pending_op.second = vpp_suface; + + GAPI_LOG_DEBUG(nullptr, "START transcode ops count: " << + my_sess.vpp_queue.size() << + ", sync id: " << + pending_op.first << + ", dec surface: " << + dec_surface << + ", trans surface: " << pending_op.second << + ", status: " << + mfxstatus_to_string(my_sess.last_status)); + + if (my_sess.last_status == MFX_ERR_MORE_SURFACE || + my_sess.last_status == MFX_ERR_NONE) { + pending_op.second->Data.Locked++; // TODO -S- workaround + my_sess.vpp_queue.emplace(pending_op); + } + } + + try { + my_sess.swap_transcode_surface(*this); + } catch (const std::runtime_error& ex) { + // NB: not an error, yield CPU ticks to check + // surface availability at a next phase. + // But print WARNING to notify user about pipeline stuck + GAPI_LOG_WARNING(nullptr, "[" << my_sess.session << + "] has no VPP surface, reason: " << + ex.what()); + my_sess.vpp_surface_ptr.reset(); + break; + } + } while(my_sess.last_status == MFX_ERR_MORE_SURFACE); + + if (my_sess.vpp_surface_ptr.expired()) { + // TODO break main loop + break; + } + } + return ExecutionStatus::Continue; + }, + // 4) Wait for ASYNC decode result + [this] (EngineSession& sess) -> ExecutionStatus + { + LegacyTranscodeSession& my_sess = static_cast(sess); + do { + if (!my_sess.vpp_queue.empty()) { // FIFO: check the oldest async operation complete + LegacyDecodeSession::op_handle_t& pending_op = my_sess.vpp_queue.front(); + sess.last_status = MFXVideoCORE_SyncOperation(sess.session, pending_op.first, 0); + + GAPI_LOG_DEBUG(nullptr, "pending VPP ops count: " << + my_sess.vpp_queue.size() << + ", sync id: " << + pending_op.first << + ", surface: " << + pending_op.second << + ", status: " << + mfxstatus_to_string(my_sess.last_status)); + + // put frames in ready queue on success + if (MFX_ERR_NONE == sess.last_status) { + on_frame_ready(my_sess, pending_op.second); + } + } + } while (MFX_ERR_NONE == sess.last_status && !my_sess.vpp_queue.empty()); + return ExecutionStatus::Continue; + }, + // 5) Falls back on generic status procesing + [this] (EngineSession& sess) -> ExecutionStatus + { + return this->process_error(sess.last_status, static_cast(sess)); + } + ); +} + +ProcessingEngineBase::session_ptr +VPLLegacyTranscodeEngine::initialize_session(mfxSession mfx_session, + const std::vector& cfg_params, + std::shared_ptr provider) { + // NB: obtain decoder params + VPLLegacyDecodeEngine::SessionParam decode_params = + prepare_session_param(mfx_session, cfg_params, provider); + + + // NB: create transcode params + const auto& mfxDecParams = decode_params.decoder_params.param; + + // NB: create transcode params: Out = In by default, In = initially decoded + mfxVideoParam mfxVPPParams{0}; + mfxVPPParams.vpp.In = mfxDecParams.mfx.FrameInfo; + mfxVPPParams.vpp.Out = mfxVPPParams.vpp.In; + + std::map cfg_vpp_params = + VPLLegacyTranscodeEngine::get_vpp_params(cfg_params); + + // override some in-params + if (set_vpp_param(CfgParam::vpp_in_width_name(), mfxVPPParams.vpp.In.Width, + cfg_vpp_params, mfx_session)) { + mfxVPPParams.vpp.In.Width = ALIGN16(mfxVPPParams.vpp.In.Width); + } + if (set_vpp_param(CfgParam::vpp_in_height_name(), mfxVPPParams.vpp.In.Height, + cfg_vpp_params, mfx_session)) { + mfxVPPParams.vpp.In.Height = ALIGN16(mfxVPPParams.vpp.In.Height); + } + set_vpp_param(CfgParam::vpp_in_crop_x_name(), mfxVPPParams.vpp.In.CropX, + cfg_vpp_params, mfx_session); + set_vpp_param(CfgParam::vpp_in_crop_y_name(), mfxVPPParams.vpp.In.CropY, + cfg_vpp_params, mfx_session); + set_vpp_param(CfgParam::vpp_in_crop_w_name(), mfxVPPParams.vpp.In.CropW, + cfg_vpp_params, mfx_session); + set_vpp_param(CfgParam::vpp_in_crop_h_name(), mfxVPPParams.vpp.In.CropH, + cfg_vpp_params, mfx_session); + + // override out params + set_vpp_param(CfgParam::vpp_out_fourcc_name(), mfxVPPParams.vpp.Out.FourCC, + cfg_vpp_params, mfx_session); + set_vpp_param(CfgParam::vpp_out_chroma_format_name(), mfxVPPParams.vpp.Out.ChromaFormat, + cfg_vpp_params, mfx_session); + if (set_vpp_param(CfgParam::vpp_out_width_name(), mfxVPPParams.vpp.Out.Width, + cfg_vpp_params, mfx_session)) { + mfxVPPParams.vpp.Out.Width = ALIGN16(mfxVPPParams.vpp.Out.Width); + } + if (set_vpp_param(CfgParam::vpp_out_height_name(), mfxVPPParams.vpp.Out.Height, + cfg_vpp_params, mfx_session)) { + mfxVPPParams.vpp.Out.Height = ALIGN16(mfxVPPParams.vpp.Out.Height); + } + set_vpp_param(CfgParam::vpp_out_crop_x_name(), mfxVPPParams.vpp.Out.CropX, + cfg_vpp_params, mfx_session); + set_vpp_param(CfgParam::vpp_out_crop_y_name(), mfxVPPParams.vpp.Out.CropY, + cfg_vpp_params, mfx_session); + set_vpp_param(CfgParam::vpp_out_crop_w_name(), mfxVPPParams.vpp.Out.CropW, + cfg_vpp_params, mfx_session); + set_vpp_param(CfgParam::vpp_out_crop_h_name(), mfxVPPParams.vpp.Out.CropH, + cfg_vpp_params, mfx_session); + set_vpp_param(CfgParam::vpp_out_pic_struct_name(), mfxVPPParams.vpp.Out.PicStruct, + cfg_vpp_params, mfx_session); + set_vpp_param(CfgParam::vpp_out_framerate_n_name(), mfxVPPParams.vpp.Out.FrameRateExtN, + cfg_vpp_params, mfx_session); + set_vpp_param(CfgParam::vpp_out_framerate_d_name(), mfxVPPParams.vpp.Out.FrameRateExtD, + cfg_vpp_params, mfx_session); + + VPLLegacyTranscodeEngine::validate_vpp_param(mfxVPPParams); + + if (mfxDecParams.IOPattern == MFX_IOPATTERN_OUT_VIDEO_MEMORY) { + mfxVPPParams.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY; + } else { + mfxVPPParams.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY | MFX_IOPATTERN_OUT_SYSTEM_MEMORY; + } + GAPI_LOG_INFO(nullptr, "Starting VPP initialization"); + + mfxFrameAllocRequest vppRequests[2]; + memset(&vppRequests, 0, sizeof(mfxFrameAllocRequest) * 2); + mfxStatus sts = MFXVideoVPP_QueryIOSurf(mfx_session, &mfxVPPParams, vppRequests); + if (MFX_ERR_NONE != sts) { + GAPI_LOG_WARNING(nullptr, "cannot execute MFXVideoVPP_QueryIOSurf"); + throw std::runtime_error("Cannot execute MFXVideoVPP_QueryIOSurf, error: " + + mfxstatus_to_string(sts)); + } + + // NB: override NumFrameSuggested preallocation size (how many frames we can hold) + // if you see bunch of WARNING about "cannot get free surface from pool" + // and have abundant RAM size then increase `CfgParam::vpp_frames_pool_size_name()` + // to keep more free surfaces in a round. Otherwise VPL decode pipeline will be waiting + // till application is freeing unusable surface on its side. + cv::optional preallocated_frames_count_cfg; + extract_optional_param_by_name(CfgParam::vpp_frames_pool_size_name(), + cfg_params, + preallocated_frames_count_cfg); + if (preallocated_frames_count_cfg.has_value()) { + GAPI_LOG_INFO(nullptr, "Try to use CfgParam \"" << CfgParam::vpp_frames_pool_size_name() << "\": " << + preallocated_frames_count_cfg.value() << ", for session: " << mfx_session); + try_modify_pool_size_request_param(CfgParam::vpp_frames_pool_size_name(), + preallocated_frames_count_cfg.value(), + vppRequests[1]); + + } + + // NB: Assing ID as upper limit descendant to distinguish specific VPP allocation + // from decode allocations witch started from 0: by local module convention + vppRequests[1].AllocId = std::numeric_limits::max(); + + vppRequests[1].Type |= MFX_MEMTYPE_FROM_VPPIN; + VPLAccelerationPolicy::pool_key_t vpp_out_pool_key = + acceleration_policy->create_surface_pool(vppRequests[1], mfxVPPParams.vpp.Out); + + sts = MFXVideoVPP_Init(mfx_session, &mfxVPPParams); + if (MFX_ERR_NONE != sts) { + GAPI_LOG_WARNING(nullptr, "cannot Init VPP"); + throw std::runtime_error("Cannot init VPP, error: " + + mfxstatus_to_string(sts)); + } + + // create engine session + TranscoderParams transcoder_param {mfxVPPParams}; + std::shared_ptr sess_ptr = + register_session(mfx_session, + std::move(decode_params.decoder_params), + std::move(transcoder_param), + provider); + + sess_ptr->init_surface_pool(decode_params.decode_pool_key); + sess_ptr->init_transcode_surface_pool(vpp_out_pool_key); + + // prepare working surfaces + sess_ptr->swap_surface(*this); + sess_ptr->swap_transcode_surface(*this); + return sess_ptr; +} + +void VPLLegacyTranscodeEngine::validate_vpp_param(const mfxVideoParam& mfxVPPParams) { + GAPI_LOG_INFO(nullptr, "Starting VPP param validation"); + if (mfxVPPParams.vpp.In.Width < mfxVPPParams.vpp.In.CropW + mfxVPPParams.vpp.In.CropX) { + GAPI_LOG_WARNING(nullptr, "Invalid vonfiguration params: sum \"" << + CfgParam::vpp_in_crop_w_name() << + "\": " << mfxVPPParams.vpp.In.CropW << " and \"" << + CfgParam::vpp_in_crop_x_name() << + "\": " << mfxVPPParams.vpp.In.CropX << + " must be less or equal to \"" << + CfgParam::vpp_in_width_name() << "\": " << + mfxVPPParams.vpp.In.Width); + GAPI_Assert(false && "Invalid VPP params combination: Width & Crop"); + } + + if (mfxVPPParams.vpp.In.Height < mfxVPPParams.vpp.In.CropH + mfxVPPParams.vpp.In.CropY) { + GAPI_LOG_WARNING(nullptr, "Invalid vonfiguration params: sum \"" << + CfgParam::vpp_in_crop_h_name() << + "\": " << mfxVPPParams.vpp.In.CropH << " and \"" << + CfgParam::vpp_in_crop_y_name() << + "\": " << mfxVPPParams.vpp.In.CropY << + " must be less or equal to \"" << + CfgParam::vpp_in_height_name() << "\": " << + mfxVPPParams.vpp.In.Height); + GAPI_Assert(false && "Invalid VPP params combination: Height & Crop"); + } + + if (mfxVPPParams.vpp.Out.Width < mfxVPPParams.vpp.Out.CropW + mfxVPPParams.vpp.Out.CropX) { + GAPI_LOG_WARNING(nullptr, "Invalid vonfiguration params: sum \"" << + CfgParam::vpp_out_crop_w_name() << + "\": " << mfxVPPParams.vpp.Out.CropW << " and \"" << + CfgParam::vpp_out_crop_x_name() << + "\": " << mfxVPPParams.vpp.Out.CropX << + " must be less or equal to \"" << + CfgParam::vpp_out_width_name() << "\": " << + mfxVPPParams.vpp.Out.Width); + GAPI_Assert(false && "Invalid VPP params combination: Width & Crop"); + } + + if (mfxVPPParams.vpp.Out.Height < mfxVPPParams.vpp.Out.CropH + mfxVPPParams.vpp.Out.CropY) { + GAPI_LOG_WARNING(nullptr, "Invalid vonfiguration params: sum \"" << + CfgParam::vpp_out_crop_h_name() << + "\": " << mfxVPPParams.vpp.Out.CropH << " and \"" << + CfgParam::vpp_out_crop_y_name() << + "\": " << mfxVPPParams.vpp.Out.CropY << + " must be less or equal to \"" << + CfgParam::vpp_out_height_name() << "\": " << + mfxVPPParams.vpp.Out.Height); + GAPI_Assert(false && "Invalid VPP params combination: Height & Crop"); + } + + GAPI_LOG_INFO(nullptr, "Finished VPP param validation"); +} + +ProcessingEngineBase::ExecutionStatus VPLLegacyTranscodeEngine::execute_op(operation_t& op, EngineSession& sess) { + return op(sess); +} + +void VPLLegacyTranscodeEngine::on_frame_ready(LegacyTranscodeSession& sess, + mfxFrameSurface1* ready_surface) +{ + GAPI_LOG_DEBUG(nullptr, "[" << sess.session << "], frame ready"); + + // manage memory ownership rely on acceleration policy + ready_surface->Data.Locked--; // TODO -S- workaround + auto frame_adapter = acceleration_policy->create_frame_adapter(sess.vpp_out_pool_id, + ready_surface); + ready_frames.emplace(cv::MediaFrame(std::move(frame_adapter)), sess.generate_frame_meta()); + + // pop away synced out object + sess.vpp_queue.pop(); +} +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // HAVE_ONEVPL diff --git a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp new file mode 100644 index 0000000000..cf0621dd93 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp @@ -0,0 +1,47 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#ifndef GAPI_STREAMING_ONVPL_TRANSCODE_ENGINE_LEGACY_HPP +#define GAPI_STREAMING_ONVPL_TRANSCODE_ENGINE_LEGACY_HPP +#include +#include + +#include "streaming/onevpl/engine/decode/decode_engine_legacy.hpp" + +#ifdef HAVE_ONEVPL +#include "streaming/onevpl/onevpl_export.hpp" + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { + +class LegacyTranscodeSession; +struct IDataProvider; +struct VPLAccelerationPolicy; + +class GAPI_EXPORTS VPLLegacyTranscodeEngine : public VPLLegacyDecodeEngine { +public: + + VPLLegacyTranscodeEngine(std::unique_ptr&& accel); + session_ptr initialize_session(mfxSession mfx_session, + const std::vector& cfg_params, + std::shared_ptr provider) override; + + static std::map get_vpp_params(const std::vector &cfg_params); +private: + ExecutionStatus execute_op(operation_t& op, EngineSession& sess) override; + + void on_frame_ready(LegacyTranscodeSession& sess, + mfxFrameSurface1* ready_surface); + void validate_vpp_param(const mfxVideoParam& mfxVPPParams); +}; +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // HAVE_ONEVPL +#endif // GAPI_STREAMING_ONVPL_DECODE_ENGINE_LEGACY_HPP diff --git a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.cpp b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.cpp new file mode 100644 index 0000000000..9fcabc7e10 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.cpp @@ -0,0 +1,70 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#ifdef HAVE_ONEVPL + +#include +#include + +#include "streaming/onevpl/engine/transcode/transcode_session.hpp" +#include "streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp" +#include "streaming/onevpl/accelerators/surface/surface.hpp" +#include "streaming/onevpl/utils.hpp" + +#include "logger.hpp" +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { +LegacyTranscodeSession::LegacyTranscodeSession(mfxSession sess, + DecoderParams&& decoder_param, + TranscoderParams&& transcoder_param, + std::shared_ptr provider) : + LegacyDecodeSession(sess, std::move(decoder_param), std::move(provider)), + mfx_transcoder_param(std::move(transcoder_param.param)) +{ +} + +LegacyTranscodeSession::~LegacyTranscodeSession() +{ + GAPI_LOG_INFO(nullptr, "Close Transcode for session: " << session); + MFXVideoVPP_Close(session); +} + +void LegacyTranscodeSession::init_transcode_surface_pool(VPLAccelerationPolicy::pool_key_t key) { + GAPI_Assert(key && "Init transcode pull with empty key"); + vpp_out_pool_id = key; +} + +void LegacyTranscodeSession::swap_transcode_surface(VPLLegacyTranscodeEngine& engine) { + VPLAccelerationPolicy* acceleration_policy = engine.get_accel(); + GAPI_Assert(acceleration_policy && "Empty acceleration_policy"); + try { + auto cand = acceleration_policy->get_free_surface(vpp_out_pool_id).lock(); + + GAPI_LOG_DEBUG(nullptr, "[" << session << "] swap surface" + ", old: " << (!vpp_surface_ptr.expired() + ? vpp_surface_ptr.lock()->get_handle() + : nullptr) << + ", new: "<< cand->get_handle()); + + vpp_surface_ptr = cand; + } catch (const std::runtime_error& ex) { + GAPI_LOG_WARNING(nullptr, "[" << session << "] error: " << ex.what()); + + // Delegate exception processing on caller + throw; + } +} + +const mfxFrameInfo& LegacyTranscodeSession::get_video_param() const { + return mfx_transcoder_param.vpp.Out; +} +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // HAVE_ONEVPL diff --git a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.hpp b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.hpp new file mode 100644 index 0000000000..aa6f70c587 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.hpp @@ -0,0 +1,46 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#ifndef GAPI_STREAMING_ONVPL_ENGINE_TRANSCODE_SESSION_HPP +#define GAPI_STREAMING_ONVPL_ENGINE_TRANSCODE_SESSION_HPP + +#ifdef HAVE_ONEVPL +#include "streaming/onevpl/engine/decode/decode_session.hpp" + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { + +struct IDataProvider; +class Surface; +struct VPLAccelerationPolicy; + +class GAPI_EXPORTS LegacyTranscodeSession : public LegacyDecodeSession { +public: + friend class VPLLegacyTranscodeEngine; + + LegacyTranscodeSession(mfxSession sess, DecoderParams&& decoder_param, + TranscoderParams&& transcoder_param, + std::shared_ptr provider); + ~LegacyTranscodeSession(); + + void init_transcode_surface_pool(VPLAccelerationPolicy::pool_key_t key); + void swap_transcode_surface(VPLLegacyTranscodeEngine& engine); + const mfxFrameInfo& get_video_param() const override; +private: + mfxVideoParam mfx_transcoder_param; + + VPLAccelerationPolicy::pool_key_t vpp_out_pool_id; + std::weak_ptr vpp_surface_ptr; + std::queue vpp_queue; +}; +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // HAVE_ONEVPL +#endif // GAPI_STREAMING_ONVPL_ENGINE_TRANSCODE_SESSION_HPP diff --git a/modules/gapi/src/streaming/onevpl/file_data_provider.hpp b/modules/gapi/src/streaming/onevpl/file_data_provider.hpp index cfa1245916..10171999a0 100644 --- a/modules/gapi/src/streaming/onevpl/file_data_provider.hpp +++ b/modules/gapi/src/streaming/onevpl/file_data_provider.hpp @@ -18,7 +18,7 @@ namespace cv { namespace gapi { namespace wip { namespace onevpl { -struct FileDataProvider : public IDataProvider { +struct GAPI_EXPORTS FileDataProvider : public IDataProvider { using file_ptr = std::unique_ptr; FileDataProvider(const std::string& file_path, diff --git a/modules/gapi/src/streaming/onevpl/source_priv.cpp b/modules/gapi/src/streaming/onevpl/source_priv.cpp index fd2a401957..d1ff15b06d 100644 --- a/modules/gapi/src/streaming/onevpl/source_priv.cpp +++ b/modules/gapi/src/streaming/onevpl/source_priv.cpp @@ -8,6 +8,7 @@ #include #include "streaming/onevpl/engine/decode/decode_engine_legacy.hpp" +#include "streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp" #include "streaming/onevpl/accelerators/accel_policy_dx11.hpp" #include "streaming/onevpl/accelerators/accel_policy_cpu.hpp" #include "streaming/onevpl/utils.hpp" @@ -106,6 +107,20 @@ GSource::Priv::Priv(std::shared_ptr provider, GAPI_Assert(false && "MFXSetConfigFilterProperty failed"); } + mfx_param.Type = MFX_VARIANT_TYPE_U32; + mfx_param.Data.U32 = MFX_EXTBUFF_VPP_SCALING; + sts = MFXSetConfigFilterProperty(cfg_inst, + (mfxU8 *)"mfxImplDescription.mfxVPPDescription.filter.FilterFourCC", + mfx_param); + + if (sts != MFX_ERR_NONE ) + { + GAPI_LOG_WARNING(nullptr, "MFXSetConfigFilterProperty failed, error: " << + mfxstatus_to_string(sts) << + " - for \"mfxImplDescription.mfxVPPDescription.filter.FilterFourCC\""); + GAPI_Assert(false && "MFXSetConfigFilterProperty failed"); + } + ++cfg_param_it; } @@ -204,7 +219,12 @@ GSource::Priv::Priv(std::shared_ptr provider, "GSource mfx_impl_description->ApiVersion.Major >= VPL_NEW_API_MAJOR_VERSION" " - is not implemented"); } else { - engine.reset(new VPLLegacyDecodeEngine(std::move(acceleration))); + const auto& transcode_params = VPLLegacyTranscodeEngine::get_vpp_params(preferred_params); + if (!transcode_params.empty()) { + engine.reset(new VPLLegacyTranscodeEngine(std::move(acceleration))); + } else { + engine.reset(new VPLLegacyDecodeEngine(std::move(acceleration))); + } } } @@ -212,13 +232,13 @@ GSource::Priv::Priv(std::shared_ptr provider, auto engine_session_ptr = engine->initialize_session(mfx_session, cfg_params, provider); - const mfxVideoParam& video_param = engine_session_ptr->get_video_param(); + const mfxFrameInfo& video_param = engine_session_ptr->get_video_param(); // set valid description description.size = cv::Size { - video_param.mfx.FrameInfo.Width, - video_param.mfx.FrameInfo.Height}; - switch(video_param.mfx.FrameInfo.FourCC) { + video_param.Width, + video_param.Height}; + switch(video_param.FourCC) { case MFX_FOURCC_I420: throw std::runtime_error("Cannot parse GMetaArg description: MediaFrame doesn't support I420 type"); case MFX_FOURCC_NV12: @@ -226,7 +246,7 @@ GSource::Priv::Priv(std::shared_ptr provider, break; default: throw std::runtime_error("Cannot parse GMetaArg description: MediaFrame unknown 'fmt' type: " + - std::to_string(video_param.mfx.FrameInfo.FourCC)); + std::to_string(video_param.FourCC)); } description_is_valid = true; diff --git a/modules/gapi/src/streaming/onevpl/utils.hpp b/modules/gapi/src/streaming/onevpl/utils.hpp index 36711bf9a0..76a66a63f4 100644 --- a/modules/gapi/src/streaming/onevpl/utils.hpp +++ b/modules/gapi/src/streaming/onevpl/utils.hpp @@ -73,8 +73,8 @@ const char* mfx_codec_type_to_cstr(const mfxU32 fourcc, const mfxU32 type); mfxU32 cstr_to_mfx_version(const char* cstr); -std::string mfxstatus_to_string(int64_t err); -std::string mfxstatus_to_string(mfxStatus err); +std::string GAPI_EXPORTS mfxstatus_to_string(int64_t err); +std::string GAPI_EXPORTS mfxstatus_to_string(mfxStatus err); std::ostream& operator<< (std::ostream& out, const mfxImplDescription& idesc); diff --git a/modules/gapi/test/common/gapi_core_tests_inl.hpp b/modules/gapi/test/common/gapi_core_tests_inl.hpp index 89261a6490..11b6e066a6 100644 --- a/modules/gapi/test/common/gapi_core_tests_inl.hpp +++ b/modules/gapi/test/common/gapi_core_tests_inl.hpp @@ -1701,6 +1701,25 @@ namespace { }; }; +namespace { + class TestMediaGray final : public cv::MediaFrame::IAdapter { + cv::Mat m_mat; + + public: + explicit TestMediaGray(cv::Mat m) + : m_mat(m) { + } + cv::GFrameDesc meta() const override { + return cv::GFrameDesc{ cv::MediaFormat::GRAY, cv::Size(m_mat.cols, m_mat.rows) }; + } + cv::MediaFrame::View access(cv::MediaFrame::Access) override { + cv::MediaFrame::View::Ptrs pp = { m_mat.ptr(), nullptr, nullptr, nullptr }; + cv::MediaFrame::View::Strides ss = { m_mat.step, 0u, 0u, 0u }; + return cv::MediaFrame::View(std::move(pp), std::move(ss)); + } + }; +}; + TEST_P(SizeMFTest, ParseTest) { cv::Size out_sz; @@ -1715,6 +1734,20 @@ TEST_P(SizeMFTest, ParseTest) EXPECT_EQ(sz, out_sz); } +TEST_P(SizeMFTest, ParseGrayTest) +{ + cv::Size out_sz; + cv::Mat gray = cv::Mat::eye(sz.height, sz.width, CV_8UC1); + cv::MediaFrame frame = cv::MediaFrame::Create(gray); + + cv::GFrame in; + auto out = cv::gapi::streaming::size(in); + cv::GComputation c(cv::GIn(in), cv::GOut(out)); + c.apply(cv::gin(frame), cv::gout(out_sz), getCompileArgs()); + + EXPECT_EQ(sz, out_sz); +} + } // opencv_test #endif //OPENCV_GAPI_CORE_TESTS_INL_HPP diff --git a/modules/gapi/test/gapi_frame_tests.cpp b/modules/gapi/test/gapi_frame_tests.cpp index 5911ef9d9a..76038b5168 100644 --- a/modules/gapi/test/gapi_frame_tests.cpp +++ b/modules/gapi/test/gapi_frame_tests.cpp @@ -29,6 +29,23 @@ GAPI_OCV_KERNEL(OCVBlurFrame, GBlurFrame) { } }; +G_API_OP(GBlurFrameGray, , "test.blur_frame_gray") { + static GMatDesc outMeta(GFrameDesc in) { + return cv::GMatDesc(CV_8U, 1, in.size); + } +}; + +GAPI_OCV_KERNEL(OCVBlurFrameGray, GBlurFrameGray) { + static void run(const cv::MediaFrame & in, cv::Mat & out) { + GAPI_Assert(in.desc().fmt == cv::MediaFormat::GRAY); + cv::MediaFrame::View view = in.access(cv::MediaFrame::Access::R); + cv::blur(cv::Mat(in.desc().size, CV_8UC1, view.ptr[0], view.stride[0]), + out, + cv::Size{ 3,3 }); + } +}; + + //////////////////////////////////////////////////////////////////////////////// // cv::MediaFrame tests namespace { @@ -70,6 +87,26 @@ public: return cv::MediaFrame::View(std::move(pp), std::move(ss)); } }; + +class TestMediaGray final : public cv::MediaFrame::IAdapter { + cv::Mat m_mat; + using Cb = cv::MediaFrame::View::Callback; + Cb m_cb; + +public: + explicit TestMediaGray(cv::Mat m, Cb cb = []() {}) + : m_mat(m), m_cb(cb) { + } + cv::GFrameDesc meta() const override { + return cv::GFrameDesc{ cv::MediaFormat::GRAY, cv::Size(m_mat.cols, m_mat.rows) }; + } + cv::MediaFrame::View access(cv::MediaFrame::Access) override { + cv::MediaFrame::View::Ptrs pp = { m_mat.ptr(), nullptr, nullptr, nullptr }; + cv::MediaFrame::View::Strides ss = { m_mat.step, 0u, 0u, 0u }; + return cv::MediaFrame::View(std::move(pp), std::move(ss), Cb{ m_cb }); + } +}; + } // anonymous namespace struct MediaFrame_Test: public ::testing::Test { @@ -120,6 +157,49 @@ TEST_F(MediaFrame_BGR, Input) { EXPECT_EQ(0, cvtest::norm(out_mat_ocv, out_mat_gapi, NORM_INF)); } +struct MediaFrame_Gray : public MediaFrame_Test { + M gray; + MediaFrame_Gray() + : gray(M::eye(240, 320, CV_8UC1)) { + cv::randn(gray, cv::Scalar::all(127.0f), cv::Scalar::all(40.f)); + frame = MF::Create(gray); + } +}; + +TEST_F(MediaFrame_Gray, Meta) { + auto meta = frame.desc(); + EXPECT_EQ(cv::MediaFormat::GRAY, meta.fmt); + EXPECT_EQ(cv::Size(320, 240), meta.size); +} + +TEST_F(MediaFrame_Gray, Access) { + cv::MediaFrame::View view1 = frame.access(cv::MediaFrame::Access::R); + EXPECT_EQ(gray.ptr(), view1.ptr[0]); + EXPECT_EQ(gray.step, view1.stride[0]); + + cv::MediaFrame::View view2 = frame.access(cv::MediaFrame::Access::R); + EXPECT_EQ(gray.ptr(), view2.ptr[0]); + EXPECT_EQ(gray.step, view2.stride[0]); +} + +TEST_F(MediaFrame_Gray, Input) { + // Run the OpenCV code + cv::Mat out_mat_ocv, out_mat_gapi; + cv::blur(gray, out_mat_ocv, cv::Size{ 3,3 }); + + // Run the G-API code + cv::GFrame in; + cv::GMat out = GBlurFrameGray::on(in); + cv::GComputation(cv::GIn(in), cv::GOut(out)) + .apply(cv::gin(frame), + cv::gout(out_mat_gapi), + cv::compile_args(cv::gapi::kernels())); + + // Compare + EXPECT_EQ(0, cvtest::norm(out_mat_ocv, out_mat_gapi, NORM_INF)); +} + + struct MediaFrame_NV12: public MediaFrame_Test { cv::Size sz; cv::Mat buf, y, uv; diff --git a/modules/gapi/test/infer/gapi_infer_ie_test.cpp b/modules/gapi/test/infer/gapi_infer_ie_test.cpp index 69ed80054c..8dc23a3880 100644 --- a/modules/gapi/test/infer/gapi_infer_ie_test.cpp +++ b/modules/gapi/test/infer/gapi_infer_ie_test.cpp @@ -29,6 +29,10 @@ #elif defined(_MSC_VER) #pragma warning(push) #pragma warning(disable : 4100) +# if _MSC_VER < 1910 +# pragma warning(disable:4268) // Disable warnings of ngraph. OpenVINO recommends to use MSVS 2019. +# pragma warning(disable:4800) +# endif #elif defined(__GNUC__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" diff --git a/modules/gapi/test/oak/gapi_tests_oak.cpp b/modules/gapi/test/oak/gapi_tests_oak.cpp new file mode 100644 index 0000000000..c153333374 --- /dev/null +++ b/modules/gapi/test/oak/gapi_tests_oak.cpp @@ -0,0 +1,26 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#include "../test_precomp.hpp" + +#ifdef HAVE_OAK + +#include + +namespace opencv_test +{ + +// FIXME: consider a better solution +TEST(OAK, Available) +{ + cv::GFrame in; + auto out = cv::gapi::oak::encode(in, {}); + auto args = cv::compile_args(cv::gapi::oak::ColorCameraParams{}, cv::gapi::oak::kernels()); + auto pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out)).compileStreaming(std::move(args)); +} +} // opencv_test + +#endif // HAVE_OAK diff --git a/modules/gapi/test/streaming/gapi_gstreamersource_tests.cpp b/modules/gapi/test/streaming/gapi_gstreamersource_tests.cpp index 0478d2dc1d..7921eb71c2 100644 --- a/modules/gapi/test/streaming/gapi_gstreamersource_tests.cpp +++ b/modules/gapi/test/streaming/gapi_gstreamersource_tests.cpp @@ -29,6 +29,7 @@ namespace opencv_test struct GStreamerSourceTest : public TestWithParam> { }; + TEST_P(GStreamerSourceTest, AccuracyTest) { std::string pipeline; @@ -143,6 +144,16 @@ G_TYPED_KERNEL(GGstFrameCopyToNV12, (GFrame)>, } }; +G_TYPED_KERNEL(GGstFrameCopyToGRAY8, , + "org.opencv.test.gstframe_copy_to_gray8") +{ + static GMatDesc outMeta(GFrameDesc desc) { + GMatDesc y{ CV_8U, 1, desc.size, false }; + return y; + } +}; + + GAPI_OCV_KERNEL(GOCVGstFrameCopyToNV12, GGstFrameCopyToNV12) { static void run(const cv::MediaFrame& in, cv::Mat& y, cv::Mat& uv) @@ -156,21 +167,50 @@ GAPI_OCV_KERNEL(GOCVGstFrameCopyToNV12, GGstFrameCopyToNV12) } }; +GAPI_OCV_KERNEL(GOCVGstFrameCopyToGRAY8, GGstFrameCopyToGRAY8) +{ + static void run(const cv::MediaFrame & in, cv::Mat & y) + { + auto view = in.access(cv::MediaFrame::Access::R); + cv::Mat ly(y.size(), y.type(), view.ptr[0], view.stride[0]); + ly.copyTo(y); + } +}; + + TEST_P(GStreamerSourceTest, GFrameTest) { std::string pipeline; cv::Size expectedFrameSize; std::size_t streamLength { }; + bool isNV12 = false; std::tie(pipeline, expectedFrameSize, streamLength) = GetParam(); + //Check if pipline string contains NV12 sub-string + if (pipeline.find("NV12") != std::string::npos) { + isNV12 = true; + } + // Graph declaration: cv::GFrame in; cv::GMat copiedY, copiedUV; - std::tie(copiedY, copiedUV) = GGstFrameCopyToNV12::on(in); - cv::GComputation c(cv::GIn(in), cv::GOut(copiedY, copiedUV)); + if (isNV12) { + std::tie(copiedY, copiedUV) = GGstFrameCopyToNV12::on(in); + } + else { + copiedY = GGstFrameCopyToGRAY8::on(in); + } + + cv::GComputation c(cv::GIn(in), isNV12 ? cv::GOut(copiedY, copiedUV) : cv::GOut(copiedY)); // Graph compilation for streaming mode: - auto ccomp = c.compileStreaming(cv::compile_args(cv::gapi::kernels())); + cv::GStreamingCompiled ccomp; + if (isNV12) { + ccomp = c.compileStreaming(cv::compile_args(cv::gapi::kernels())); + } else { + ccomp = c.compileStreaming(cv::compile_args(cv::gapi::kernels())); + } + EXPECT_TRUE(ccomp); EXPECT_FALSE(ccomp.running()); @@ -186,29 +226,41 @@ TEST_P(GStreamerSourceTest, GFrameTest) // Streaming - pulling of frames until the end: cv::Mat y_mat, uv_mat; - EXPECT_TRUE(ccomp.pull(cv::gout(y_mat, uv_mat))); + EXPECT_TRUE(isNV12 ? ccomp.pull(cv::gout(y_mat, uv_mat)) : ccomp.pull(cv::gout(y_mat))); EXPECT_TRUE(!y_mat.empty()); - EXPECT_TRUE(!uv_mat.empty()); + if (isNV12) { + EXPECT_TRUE(!uv_mat.empty()); + } cv::Size expectedYSize = expectedFrameSize; cv::Size expectedUVSize = expectedFrameSize / 2; EXPECT_EQ(expectedYSize, y_mat.size()); - EXPECT_EQ(expectedUVSize, uv_mat.size()); + if (isNV12) { + EXPECT_EQ(expectedUVSize, uv_mat.size()); + } EXPECT_EQ(CV_8UC1, y_mat.type()); - EXPECT_EQ(CV_8UC2, uv_mat.type()); + if (isNV12) { + EXPECT_EQ(CV_8UC2, uv_mat.type()); + } std::size_t framesCount = 1UL; - while (ccomp.pull(cv::gout(y_mat, uv_mat))) { + while (isNV12 ? ccomp.pull(cv::gout(y_mat, uv_mat)) : ccomp.pull(cv::gout(y_mat))) { EXPECT_TRUE(!y_mat.empty()); - EXPECT_TRUE(!uv_mat.empty()); + if (isNV12) { + EXPECT_TRUE(!uv_mat.empty()); + } EXPECT_EQ(expectedYSize, y_mat.size()); - EXPECT_EQ(expectedUVSize, uv_mat.size()); + if (isNV12) { + EXPECT_EQ(expectedUVSize, uv_mat.size()); + } EXPECT_EQ(CV_8UC1, y_mat.type()); - EXPECT_EQ(CV_8UC2, uv_mat.type()); + if (isNV12) { + EXPECT_EQ(CV_8UC2, uv_mat.type()); + } framesCount++; } @@ -221,36 +273,56 @@ TEST_P(GStreamerSourceTest, GFrameTest) EXPECT_EQ(streamLength, framesCount); } + // FIXME: Need to launch with sudo. May be infrastructure problems. // TODO: It is needed to add tests for streaming from native KMB camera: kmbcamsrc // GStreamer element. INSTANTIATE_TEST_CASE_P(CameraEmulatingPipeline, GStreamerSourceTest, Combine(Values("videotestsrc is-live=true pattern=colors num-buffers=10 ! " "videorate ! videoscale ! " - "video/x-raw,width=1920,height=1080,framerate=3/1 ! " + "video/x-raw,format=NV12,width=1920,height=1080,framerate=3/1 ! " + "appsink", + "videotestsrc is-live=true pattern=colors num-buffers=10 ! " + "videorate ! videoscale ! " + "video/x-raw,format=GRAY8,width=1920,height=1080,framerate=3/1 ! " "appsink"), Values(cv::Size(1920, 1080)), Values(10UL))); + INSTANTIATE_TEST_CASE_P(FileEmulatingPipeline, GStreamerSourceTest, Combine(Values("videotestsrc pattern=colors num-buffers=10 ! " "videorate ! videoscale ! " - "video/x-raw,width=640,height=420,framerate=3/1 ! " + "video/x-raw,format=NV12,width=640,height=420,framerate=3/1 ! " + "appsink", + "videotestsrc pattern=colors num-buffers=10 ! " + "videorate ! videoscale ! " + "video/x-raw,format=GRAY8,width=640,height=420,framerate=3/1 ! " "appsink"), Values(cv::Size(640, 420)), Values(10UL))); + INSTANTIATE_TEST_CASE_P(MultipleLiveSources, GStreamerSourceTest, Combine(Values("videotestsrc is-live=true pattern=colors num-buffers=10 ! " - "videoscale ! video/x-raw,width=1280,height=720 ! appsink " + "videoscale ! video/x-raw,format=NV12,width=1280,height=720 ! appsink " + "videotestsrc is-live=true pattern=colors num-buffers=10 ! " + "fakesink", + "videotestsrc is-live=true pattern=colors num-buffers=10 ! " + "videoscale ! video/x-raw,format=GRAY8,width=1280,height=720 ! appsink " "videotestsrc is-live=true pattern=colors num-buffers=10 ! " "fakesink"), Values(cv::Size(1280, 720)), Values(10UL))); + INSTANTIATE_TEST_CASE_P(MultipleNotLiveSources, GStreamerSourceTest, Combine(Values("videotestsrc pattern=colors num-buffers=10 ! " - "videoscale ! video/x-raw,width=1280,height=720 ! appsink " + "videoscale ! video/x-raw,format=NV12,width=1280,height=720 ! appsink " + "videotestsrc pattern=colors num-buffers=10 ! " + "fakesink", + "videotestsrc pattern=colors num-buffers=10 ! " + "videoscale ! video/x-raw,format=GRAY8,width=1280,height=720 ! appsink " "videotestsrc pattern=colors num-buffers=10 ! " "fakesink"), Values(cv::Size(1280, 720)), @@ -308,11 +380,11 @@ TEST(GStreamerMultiSourceSmokeTest, Test) EXPECT_FALSE(ccomp.running()); } -struct GStreamerMultiSourceTest : +struct GStreamerMultiSourceTestNV12 : public TestWithParam> { }; -TEST_P(GStreamerMultiSourceTest, ImageDataTest) +TEST_P(GStreamerMultiSourceTestNV12, ImageDataTest) { std::string pathToLeftIm = findDataFile("cv/stereomatching/datasets/tsukuba/im6.png"); std::string pathToRightIm = findDataFile("cv/stereomatching/datasets/tsukuba/im2.png"); @@ -377,7 +449,7 @@ TEST_P(GStreamerMultiSourceTest, ImageDataTest) EXPECT_FALSE(compiled.running()); } -INSTANTIATE_TEST_CASE_P(GStreamerMultiSourceViaGMatsTest, GStreamerMultiSourceTest, +INSTANTIATE_TEST_CASE_P(GStreamerMultiSourceViaGMatsTest, GStreamerMultiSourceTestNV12, Combine(Values(cv::GComputation([]() { cv::GMat in1, in2; @@ -387,7 +459,7 @@ INSTANTIATE_TEST_CASE_P(GStreamerMultiSourceViaGMatsTest, GStreamerMultiSourceTe })), Values(cv::gapi::wip::GStreamerSource::OutputType::MAT))); -INSTANTIATE_TEST_CASE_P(GStreamerMultiSourceViaGFramesTest, GStreamerMultiSourceTest, +INSTANTIATE_TEST_CASE_P(GStreamerMultiSourceViaGFramesTest, GStreamerMultiSourceTestNV12, Combine(Values(cv::GComputation([]() { cv::GFrame in1, in2; @@ -396,6 +468,96 @@ INSTANTIATE_TEST_CASE_P(GStreamerMultiSourceViaGFramesTest, GStreamerMultiSource cv::gapi::streaming::BGR(in2))); })), Values(cv::gapi::wip::GStreamerSource::OutputType::FRAME))); + +struct GStreamerMultiSourceTestGRAY8 : + public TestWithParam> +{ }; + +TEST_P(GStreamerMultiSourceTestGRAY8, ImageDataTest) +{ + std::string pathToLeftIm = findDataFile("cv/stereomatching/datasets/tsukuba/im6.png"); + std::string pathToRightIm = findDataFile("cv/stereomatching/datasets/tsukuba/im2.png"); + + std::string pipelineToReadImage("filesrc location=LOC ! pngdec ! videoconvert ! " + "videoscale ! video/x-raw,format=GRAY8 ! appsink"); + + cv::gapi::wip::GStreamerSource leftImageProvider( + std::regex_replace(pipelineToReadImage, std::regex("LOC"), pathToLeftIm)); + cv::gapi::wip::GStreamerSource rightImageProvider( + std::regex_replace(pipelineToReadImage, std::regex("LOC"), pathToRightIm)); + + cv::gapi::wip::Data leftImData, rightImData; + leftImageProvider.pull(leftImData); + rightImageProvider.pull(rightImData); + + cv::Mat leftRefMat = cv::util::get(leftImData); + cv::Mat rightRefMat = cv::util::get(rightImData); + + // Retrieve test parameters: + std::tuple params = GetParam(); + cv::GComputation extractImage = std::move(std::get<0>(params)); + cv::gapi::wip::GStreamerSource::OutputType outputType = std::get<1>(params); + + // Graph compilation for streaming mode: + auto compiled = + extractImage.compileStreaming(); + + EXPECT_TRUE(compiled); + EXPECT_FALSE(compiled.running()); + + cv::gapi::wip::GStreamerPipeline + pipeline(std::string("multifilesrc location=" + pathToLeftIm + " index=0 loop=true ! " + "pngdec ! videoconvert ! videoscale ! video/x-raw,format=GRAY8 ! " + "appsink name=sink1 ") + + std::string("multifilesrc location=" + pathToRightIm + " index=0 loop=true ! " + "pngdec ! videoconvert ! videoscale ! video/x-raw,format=GRAY8 ! " + "appsink name=sink2")); + + // GStreamer streaming sources configuration: + auto src1 = pipeline.getStreamingSource("sink1", outputType); + auto src2 = pipeline.getStreamingSource("sink2", outputType); + + compiled.setSource(cv::gin(src1, src2)); + + // Start of streaming: + compiled.start(); + EXPECT_TRUE(compiled.running()); + + // Streaming - pulling of frames: + cv::Mat in_mat1, in_mat2; + + std::size_t counter { }, limit { 10 }; + while(compiled.pull(cv::gout(in_mat1, in_mat2)) && (counter < limit)) { + EXPECT_EQ(0, cv::norm(in_mat1, leftRefMat, cv::NORM_INF)); + EXPECT_EQ(0, cv::norm(in_mat2, rightRefMat, cv::NORM_INF)); + ++counter; + } + + compiled.stop(); + + EXPECT_FALSE(compiled.running()); +} + +INSTANTIATE_TEST_CASE_P(GStreamerMultiSourceViaGMatsTest, GStreamerMultiSourceTestGRAY8, + Combine(Values(cv::GComputation([]() + { + cv::GMat in1, in2; + return cv::GComputation(cv::GIn(in1, in2), + cv::GOut(cv::gapi::copy(in1), + cv::gapi::copy(in2))); + })), + Values(cv::gapi::wip::GStreamerSource::OutputType::MAT))); + +INSTANTIATE_TEST_CASE_P(GStreamerMultiSourceViaGFramesTest, GStreamerMultiSourceTestGRAY8, + Combine(Values(cv::GComputation([]() + { + cv::GFrame in1, in2; + return cv::GComputation(cv::GIn(in1, in2), + cv::GOut(cv::gapi::streaming::BGR(in1), + cv::gapi::streaming::BGR(in2))); + })), + Values(cv::gapi::wip::GStreamerSource::OutputType::FRAME))); + } // namespace opencv_test #endif // HAVE_GSTREAMER diff --git a/modules/gapi/test/streaming/gapi_streaming_tests.cpp b/modules/gapi/test/streaming/gapi_streaming_tests.cpp index 3f876fc61b..4d33d4b0c5 100644 --- a/modules/gapi/test/streaming/gapi_streaming_tests.cpp +++ b/modules/gapi/test/streaming/gapi_streaming_tests.cpp @@ -164,6 +164,26 @@ public: } }; +class TestMediaGRAY final : public cv::MediaFrame::IAdapter { + cv::Mat m_mat; + using Cb = cv::MediaFrame::View::Callback; + Cb m_cb; + +public: + explicit TestMediaGRAY(cv::Mat m, Cb cb = []() {}) + : m_mat(m), m_cb(cb) { + } + cv::GFrameDesc meta() const override { + return cv::GFrameDesc{ cv::MediaFormat::GRAY, cv::Size(m_mat.cols, m_mat.rows) }; + } + cv::MediaFrame::View access(cv::MediaFrame::Access) override { + cv::MediaFrame::View::Ptrs pp = { m_mat.ptr(), nullptr, nullptr, nullptr }; + cv::MediaFrame::View::Strides ss = { m_mat.step, 0u, 0u, 0u }; + return cv::MediaFrame::View(std::move(pp), std::move(ss), Cb{ m_cb }); + } +}; + + class BGRSource : public cv::gapi::wip::GCaptureSource { public: explicit BGRSource(const std::string& pipeline) @@ -230,6 +250,31 @@ public: } }; +class GRAYSource : public cv::gapi::wip::GCaptureSource { +public: + explicit GRAYSource(const std::string& pipeline) + : cv::gapi::wip::GCaptureSource(pipeline) { + } + + bool pull(cv::gapi::wip::Data& data) { + if (cv::gapi::wip::GCaptureSource::pull(data)) { + cv::Mat bgr = cv::util::get(data); + cv::Mat gray; + cvtColor(bgr, gray, cv::COLOR_BGR2GRAY); + data = cv::MediaFrame::Create(gray); + return true; + } + return false; + } + + GMetaArg descr_of() const override { + return cv::GMetaArg{ cv::GFrameDesc{cv::MediaFormat::GRAY, + cv::util::get( + cv::gapi::wip::GCaptureSource::descr_of()).size} }; + } +}; + + void checkPullOverload(const cv::Mat& ref, const bool has_output, cv::util::variant& args) { @@ -1789,6 +1834,46 @@ TEST(GAPI_Streaming, CopyFrame) } } +TEST(GAPI_Streaming, CopyFrameGray) +{ + std::string filepath = findDataFile("cv/video/768x576.avi"); + + cv::GFrame in; + auto out = cv::gapi::copy(in); + + cv::GComputation comp(cv::GIn(in), cv::GOut(out)); + + auto cc = comp.compileStreaming(); + try { + cc.setSource(filepath); + } + catch (...) { + throw SkipTestException("Video file can not be opened"); + } + + cv::VideoCapture cap; + cap.open(filepath); + if (!cap.isOpened()) + throw SkipTestException("Video file can not be opened"); + + cv::MediaFrame frame; + cv::Mat ocv_mat; + std::size_t num_frames = 0u; + std::size_t max_frames = 10u; + + cc.start(); + while (cc.pull(cv::gout(frame)) && num_frames < max_frames) + { + auto view = frame.access(cv::MediaFrame::Access::R); + cv::Mat gapi_mat(frame.desc().size, CV_8UC1, view.ptr[0]); + num_frames++; + cap >> ocv_mat; + cv::Mat gray; + cvtColor(ocv_mat, gray, cv::COLOR_BGR2GRAY); + EXPECT_EQ(0, cvtest::norm(gray, gapi_mat, NORM_INF)); + } +} + TEST(GAPI_Streaming, CopyMat) { std::string filepath = findDataFile("cv/video/768x576.avi"); @@ -1892,23 +1977,97 @@ TEST(GAPI_Streaming, Reshape) } } +TEST(GAPI_Streaming, ReshapeGray) +{ + std::string filepath = findDataFile("cv/video/768x576.avi"); + + cv::GFrame in; + auto out = cv::gapi::copy(in); + + cv::GComputation comp(cv::GIn(in), cv::GOut(out)); + + auto cc = comp.compileStreaming(); + try { + cc.setSource(filepath); + } + catch (...) { + throw SkipTestException("Video file can not be opened"); + } + + cv::VideoCapture cap; + cap.open(filepath); + if (!cap.isOpened()) + throw SkipTestException("Video file can not be opened"); + + cv::MediaFrame frame; + cv::Mat ocv_mat; + std::size_t num_frames = 0u; + std::size_t max_frames = 10u; + + cc.start(); + while (cc.pull(cv::gout(frame)) && num_frames < max_frames) + { + auto view = frame.access(cv::MediaFrame::Access::R); + cv::Mat gapi_mat(frame.desc().size, CV_8UC1, view.ptr[0]); + num_frames++; + cap >> ocv_mat; + cv::Mat gray; + cvtColor(ocv_mat, gray, cv::COLOR_BGR2GRAY); + EXPECT_EQ(0, cvtest::norm(gray, gapi_mat, NORM_INF)); + } + + // Reshape the graph meta + filepath = findDataFile("cv/video/1920x1080.avi"); + cc.stop(); + try { + cc.setSource(filepath); + } + catch (...) { + throw SkipTestException("Video file can not be opened"); + } + + cap.open(filepath); + if (!cap.isOpened()) + throw SkipTestException("Video file can not be opened"); + + cv::MediaFrame frame2; + cv::Mat ocv_mat2; + + num_frames = 0u; + + cc.start(); + while (cc.pull(cv::gout(frame2)) && num_frames < max_frames) + { + auto view = frame2.access(cv::MediaFrame::Access::R); + cv::Mat gapi_mat(frame2.desc().size, CV_8UC1, view.ptr[0]); + num_frames++; + cap >> ocv_mat2; + cv::Mat gray; + cvtColor(ocv_mat2, gray, cv::COLOR_BGR2GRAY); + EXPECT_EQ(0, cvtest::norm(gray, gapi_mat, NORM_INF)); + } +} + + namespace { enum class TestSourceType { BGR, - NV12 + NV12, + GRAY }; std::ostream& operator<<(std::ostream& os, TestSourceType a) { os << "Source:"; switch (a) { case TestSourceType::BGR: return os << "BGR"; case TestSourceType::NV12: return os << "NV12"; + case TestSourceType::GRAY: return os << "GRAY"; default: CV_Assert(false && "unknown TestSourceType"); } } cv::gapi::wip::IStreamSource::Ptr createTestSource(TestSourceType sourceType, const std::string& pipeline) { - assert(sourceType == TestSourceType::BGR || sourceType == TestSourceType::NV12); + assert(sourceType == TestSourceType::BGR || sourceType == TestSourceType::NV12 || sourceType == TestSourceType::GRAY); cv::gapi::wip::IStreamSource::Ptr ptr { }; @@ -1933,6 +2092,16 @@ namespace { } break; } + case TestSourceType::GRAY: { + try { + ptr = cv::gapi::wip::make_src(pipeline); + } + catch (...) { + throw SkipTestException(std::string("GRAYSource for '") + pipeline + + "' couldn't be created!"); + } + break; + } default: { throw SkipTestException("Incorrect type of source! " "Something went wrong in the test!"); @@ -2000,6 +2169,25 @@ namespace { cvtBGR2NV12(bgr, y, uv); return uv; } }, + { std::make_pair(TestSourceType::GRAY, TestAccessType::BGR), + [](const cv::Mat& bgr) { + cv::Mat gray; + cv::cvtColor(bgr, gray, cv::COLOR_BGR2GRAY); + cv::Mat out_bgr; + cv::cvtColor(gray, out_bgr, cv::COLOR_GRAY2BGR); + return out_bgr; + } }, + { std::make_pair(TestSourceType::GRAY, TestAccessType::Y), + [](const cv::Mat& bgr) { + cv::Mat gray; + cv::cvtColor(bgr, gray, cv::COLOR_BGR2GRAY); + return gray; + } }, + { std::make_pair(TestSourceType::GRAY, TestAccessType::UV), + [](const cv::Mat& bgr) { + cv::Mat uv(bgr.size() / 2, CV_8UC2, cv::Scalar::all(127)); + return uv; + } }, }; } // anonymous namespace @@ -2007,6 +2195,7 @@ struct GAPI_Accessors_In_Streaming : public TestWithParam< std::tuple> { }; + TEST_P(GAPI_Accessors_In_Streaming, AccuracyTest) { std::string filepath{}; @@ -2050,10 +2239,11 @@ TEST_P(GAPI_Accessors_In_Streaming, AccuracyTest) INSTANTIATE_TEST_CASE_P(TestAccessor, GAPI_Accessors_In_Streaming, Combine(Values("cv/video/768x576.avi"), - Values(TestSourceType::BGR, TestSourceType::NV12), + Values(TestSourceType::BGR, TestSourceType::NV12, TestSourceType::GRAY), Values(TestAccessType::BGR, TestAccessType::Y, TestAccessType::UV) )); + struct GAPI_Accessors_Meta_In_Streaming : public TestWithParam< std::tuple> { }; @@ -2120,7 +2310,7 @@ TEST_P(GAPI_Accessors_Meta_In_Streaming, AccuracyTest) INSTANTIATE_TEST_CASE_P(AccessorMeta, GAPI_Accessors_Meta_In_Streaming, Combine(Values("cv/video/768x576.avi"), - Values(TestSourceType::BGR, TestSourceType::NV12), + Values(TestSourceType::BGR, TestSourceType::NV12, TestSourceType::GRAY), Values(TestAccessType::BGR, TestAccessType::Y, TestAccessType::UV) )); @@ -2232,7 +2422,7 @@ TEST(GAPI_Streaming, TestDesyncRMat) { cv::optional out_desync; cv::optional out_rmat; while (true) { - // Initially it throwed "bad variant access" since there was + // Initially it threw "bad variant access" since there was // no RMat handling in wrap_opt_arg EXPECT_NO_THROW(pipe.pull(cv::gout(out_desync, out_rmat))); if (out_rmat) break; @@ -2273,11 +2463,54 @@ TEST(GAPI_Streaming, TestDesyncMediaFrame) { cv::optional out_desync; cv::optional out_frame; while (true) { - // Initially it throwed "bad variant access" since there was + // Initially it threw "bad variant access" since there was // no MediaFrame handling in wrap_opt_arg EXPECT_NO_THROW(pipe.pull(cv::gout(out_desync, out_frame))); if (out_frame) break; } } +G_API_OP(GTestBlurGray, , "test.blur_gray") { + static GFrameDesc outMeta(GFrameDesc d) { return d; } +}; +GAPI_OCV_KERNEL(GOcvTestBlurGray, GTestBlurGray) { + static void run(const cv::MediaFrame & in, cv::MediaFrame & out) { + auto d = in.desc(); + GAPI_Assert(d.fmt == cv::MediaFormat::GRAY); + auto view = in.access(cv::MediaFrame::Access::R); + cv::Mat mat(d.size, CV_8UC1, view.ptr[0]); + cv::Mat blurred; + cv::blur(mat, blurred, cv::Size{ 3,3 }); + out = cv::MediaFrame::Create(blurred); + } +}; + +TEST(GAPI_Streaming, TestDesyncMediaFrameGray) { + cv::GFrame in; + auto blurred = GTestBlurGray::on(in); + auto desynced = cv::gapi::streaming::desync(blurred); + auto out = GTestBlurGray::on(blurred); + auto pipe = cv::GComputation(cv::GIn(in), cv::GOut(desynced, out)) + .compileStreaming(cv::compile_args(cv::gapi::kernels())); + + std::string filepath = findDataFile("cv/video/768x576.avi"); + try { + pipe.setSource(filepath); + } + catch (...) { + throw SkipTestException("Video file can not be opened"); + } + pipe.start(); + + cv::optional out_desync; + cv::optional out_frame; + while (true) { + // Initially it threw "bad variant access" since there was + // no MediaFrame handling in wrap_opt_arg + EXPECT_NO_THROW(pipe.pull(cv::gout(out_desync, out_frame))); + if (out_frame) break; + } +} + + } // namespace opencv_test diff --git a/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp b/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp index c62f58eecf..51fb9f276a 100644 --- a/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp +++ b/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp @@ -29,6 +29,7 @@ #ifdef HAVE_ONEVPL #include +#include "streaming/onevpl/file_data_provider.hpp" #include "streaming/onevpl/cfg_param_device_selector.hpp" #include "streaming/onevpl/accelerators/surface/surface.hpp" @@ -37,8 +38,15 @@ #include "streaming/onevpl/accelerators/accel_policy_dx11.hpp" #include "streaming/onevpl/accelerators/dx11_alloc_resource.hpp" #include "streaming/onevpl/accelerators/utils/shared_lock.hpp" -#include "streaming/onevpl/engine/processing_engine_base.hpp" -#include "streaming/onevpl/engine/engine_session.hpp" +#define private public +#define protected public +#include "streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp" +#include "streaming/onevpl/engine/transcode/transcode_session.hpp" +#undef protected +#undef private +#include "logger.hpp" + +#define ALIGN16(value) (((value + 15) >> 4) << 4) namespace opencv_test { @@ -63,9 +71,9 @@ struct TestProcessingSession : public cv::gapi::wip::onevpl::EngineSession { EngineSession(mfx_session, {}) { } - const mfxVideoParam& get_video_param() const override { + const mfxFrameInfo& get_video_param() const override { static mfxVideoParam empty; - return empty; + return empty.mfx.FrameInfo; } }; @@ -581,7 +589,7 @@ TEST(OneVPL_Source_DX11_Accel, Init) // Allocate surfaces for decoder VPLAccelerationPolicy::pool_key_t key = accel.create_surface_pool(request, - mfxDecParams); + mfxDecParams.mfx.FrameInfo); auto cand_surface = accel.get_free_surface(key).lock(); sts = MFXVideoDECODE_Init(mfx_session, &mfxDecParams); @@ -594,6 +602,212 @@ TEST(OneVPL_Source_DX11_Accel, Init) MFXClose(mfx_session); MFXUnload(mfx_handle); } + +TEST(OneVPL_Source_DX11_Accel_VPL, Init) +{ + using namespace cv::gapi::wip::onevpl; + + std::vector cfg_params_w_dx11; + cfg_params_w_dx11.push_back(CfgParam::create_acceleration_mode(MFX_ACCEL_MODE_VIA_D3D11)); + std::unique_ptr acceleration_policy (new VPLDX11AccelerationPolicy(std::make_shared(cfg_params_w_dx11))); + + mfxLoader mfx_handle = MFXLoad(); + + mfxConfig cfg_inst_0 = MFXCreateConfig(mfx_handle); + EXPECT_TRUE(cfg_inst_0); + mfxVariant mfx_param_0; + mfx_param_0.Type = MFX_VARIANT_TYPE_U32; + mfx_param_0.Data.U32 = MFX_IMPL_TYPE_HARDWARE; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_0,(mfxU8 *)CfgParam::implementation_name(), + mfx_param_0), MFX_ERR_NONE); + + mfxConfig cfg_inst_1 = MFXCreateConfig(mfx_handle); + EXPECT_TRUE(cfg_inst_1); + mfxVariant mfx_param_1; + mfx_param_1.Type = MFX_VARIANT_TYPE_U32; + mfx_param_1.Data.U32 = MFX_ACCEL_MODE_VIA_D3D11; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_1,(mfxU8 *)CfgParam::acceleration_mode_name(), + mfx_param_1), MFX_ERR_NONE); + + mfxConfig cfg_inst_2 = MFXCreateConfig(mfx_handle); + EXPECT_TRUE(cfg_inst_2); + mfxVariant mfx_param_2; + mfx_param_2.Type = MFX_VARIANT_TYPE_U32; + mfx_param_2.Data.U32 = MFX_CODEC_HEVC; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_2,(mfxU8 *)CfgParam::decoder_id_name(), + mfx_param_2), MFX_ERR_NONE); + + mfxConfig cfg_inst_3 = MFXCreateConfig(mfx_handle); + EXPECT_TRUE(cfg_inst_3); + mfxVariant mfx_param_3; + mfx_param_3.Type = MFX_VARIANT_TYPE_U32; + mfx_param_3.Data.U32 = MFX_EXTBUFF_VPP_SCALING; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_3, + (mfxU8 *)"mfxImplDescription.mfxVPPDescription.filter.FilterFourCC", + mfx_param_3), MFX_ERR_NONE); + // create session + mfxSession mfx_session{}; + mfxStatus sts = MFXCreateSession(mfx_handle, 0, &mfx_session); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // assign acceleration + EXPECT_NO_THROW(acceleration_policy->init(mfx_session)); + + // create proper bitstream + std::string file_path = findDataFile("highgui/video/big_buck_bunny.h265"); + std::shared_ptr data_provider(new FileDataProvider(file_path, + {CfgParam::create_decoder_id(MFX_CODEC_HEVC)})); + IDataProvider::mfx_codec_id_type decoder_id_name = data_provider->get_mfx_codec_id(); + + // Prepare video param + mfxVideoParam mfxDecParams {}; + mfxDecParams.mfx.CodecId = decoder_id_name; + mfxDecParams.IOPattern = MFX_IOPATTERN_OUT_VIDEO_MEMORY; + + // try fetch & decode input data + sts = MFX_ERR_NONE; + std::shared_ptr bitstream{}; + do { + EXPECT_TRUE(data_provider->fetch_bitstream_data(bitstream)); + sts = MFXVideoDECODE_DecodeHeader(mfx_session, bitstream.get(), &mfxDecParams); + EXPECT_TRUE(MFX_ERR_NONE == sts || MFX_ERR_MORE_DATA == sts); + } while (sts == MFX_ERR_MORE_DATA && !data_provider->empty()); + + EXPECT_EQ(MFX_ERR_NONE, sts); + + mfxFrameAllocRequest request{}; + memset(&request, 0, sizeof(request)); + sts = MFXVideoDECODE_QueryIOSurf(mfx_session, &mfxDecParams, &request); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // Allocate surfaces for decoder + request.Type |= MFX_MEMTYPE_EXTERNAL_FRAME | MFX_MEMTYPE_FROM_DECODE | MFX_MEMTYPE_FROM_VPPIN; + VPLAccelerationPolicy::pool_key_t decode_pool_key = acceleration_policy->create_surface_pool(request, + mfxDecParams.mfx.FrameInfo); + sts = MFXVideoDECODE_Init(mfx_session, &mfxDecParams); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // initialize VPLL + mfxU16 vppOutImgWidth = 672; + mfxU16 vppOutImgHeight = 382; + + mfxVideoParam mfxVPPParams{0}; + mfxVPPParams.vpp.In = mfxDecParams.mfx.FrameInfo; + + mfxVPPParams.vpp.Out.FourCC = MFX_FOURCC_NV12; + mfxVPPParams.vpp.Out.ChromaFormat = MFX_CHROMAFORMAT_YUV420; + mfxVPPParams.vpp.Out.Width = ALIGN16(vppOutImgWidth); + mfxVPPParams.vpp.Out.Height = ALIGN16(vppOutImgHeight); + mfxVPPParams.vpp.Out.CropX = 0; + mfxVPPParams.vpp.Out.CropY = 0; + mfxVPPParams.vpp.Out.CropW = vppOutImgWidth; + mfxVPPParams.vpp.Out.CropH = vppOutImgHeight; + mfxVPPParams.vpp.Out.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; + mfxVPPParams.vpp.Out.FrameRateExtN = 30; + mfxVPPParams.vpp.Out.FrameRateExtD = 1; + + mfxVPPParams.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY; + + mfxFrameAllocRequest vppRequests[2]; + memset(&vppRequests, 0, sizeof(mfxFrameAllocRequest) * 2); + EXPECT_EQ(MFXVideoVPP_QueryIOSurf(mfx_session, &mfxVPPParams, vppRequests), MFX_ERR_NONE); + + vppRequests[1].AllocId = 666; + VPLAccelerationPolicy::pool_key_t vpp_out_pool_key = + acceleration_policy->create_surface_pool(vppRequests[1], mfxVPPParams.vpp.Out); + EXPECT_EQ(MFXVideoVPP_Init(mfx_session, &mfxVPPParams), MFX_ERR_NONE); + + // finalize session creation + DecoderParams d_param{bitstream, mfxDecParams}; + TranscoderParams t_param{mfxVPPParams}; + VPLLegacyTranscodeEngine engine(std::move(acceleration_policy)); + std::shared_ptr sess_ptr = + engine.register_session( + mfx_session, + std::move(d_param), + std::move(t_param), + data_provider); + + sess_ptr->init_surface_pool(decode_pool_key); + sess_ptr->init_transcode_surface_pool(vpp_out_pool_key); + + // prepare working surfaces + sess_ptr->swap_surface(engine); + sess_ptr->swap_transcode_surface(engine); + + // launch pipeline + LegacyTranscodeSession & my_sess = *sess_ptr; + { + if (!my_sess.data_provider) { + my_sess.last_status = MFX_ERR_MORE_DATA; + } else { + my_sess.last_status = MFX_ERR_NONE; + if (!my_sess.data_provider->fetch_bitstream_data(my_sess.stream)) { + my_sess.last_status = MFX_ERR_MORE_DATA; + my_sess.data_provider.reset(); //close source + } + } + + // 2) enqueue ASYNC decode operation + // prepare sync object for new surface + LegacyTranscodeSession::op_handle_t sync_pair{}; + + // enqueue decode operation with current session surface + { + my_sess.last_status = + MFXVideoDECODE_DecodeFrameAsync(my_sess.session, + (my_sess.data_provider || (my_sess.stream && my_sess.stream->DataLength)) + ? my_sess.stream.get() + + : nullptr, /* No more data to read, start decode draining mode*/ + my_sess.procesing_surface_ptr.lock()->get_handle(), + &sync_pair.second, + &sync_pair.first); + + // process wait-like statuses in-place: + // It had better to use up all VPL decoding resources in pipeline + // as soon as possible. So waiting more free-surface or device free + while (my_sess.last_status == MFX_ERR_MORE_SURFACE || + my_sess.last_status == MFX_WRN_DEVICE_BUSY) { + try { + if (my_sess.last_status == MFX_ERR_MORE_SURFACE) { + my_sess.swap_surface(engine); + } + my_sess.last_status = + MFXVideoDECODE_DecodeFrameAsync(my_sess.session, + my_sess.stream.get(), + my_sess.procesing_surface_ptr.lock()->get_handle(), + &sync_pair.second, + &sync_pair.first); + + } catch (const std::runtime_error&) { + // NB: not an error, yield CPU ticks to check + // surface availability at a next phase. + break; + } + } + } + // 4) transcode + { + auto *dec_surface = sync_pair.second; + if(my_sess.vpp_surface_ptr.lock()) + { + mfxFrameSurface1* out_surf = my_sess.vpp_surface_ptr.lock()->get_handle(); + my_sess.last_status = MFXVideoVPP_RunFrameVPPAsync(my_sess.session, dec_surface, + out_surf, + nullptr, &sync_pair.first); + sync_pair.second = out_surf; + + my_sess.last_status = MFXVideoCORE_SyncOperation(my_sess.session, sync_pair.first, 11000); + } + try { + my_sess.swap_transcode_surface(engine); + } catch (... ) { + my_sess.vpp_surface_ptr.reset(); + } + } + } +} #endif // HAVE_DIRECTX #endif // HAVE_D3D11 diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt index a7fdfc8b67..65d24e0ab0 100644 --- a/modules/highgui/CMakeLists.txt +++ b/modules/highgui/CMakeLists.txt @@ -84,6 +84,9 @@ if(HAVE_QT) list(APPEND qt_deps OpenGLWidgets) endif() list(APPEND qt_deps OpenGL) + if(OPENGL_LIBRARIES) + list(APPEND HIGHGUI_LIBRARIES "${OPENGL_LIBRARIES}") + endif() endif() foreach(dt_dep ${qt_deps}) @@ -93,8 +96,11 @@ if(HAVE_QT) endforeach() else() ocv_assert(QT_VERSION_MAJOR EQUAL 4) - if (HAVE_QT_OPENGL) + if(HAVE_QT_OPENGL) set(QT_USE_QTOPENGL TRUE) + if(OPENGL_LIBRARIES) + list(APPEND HIGHGUI_LIBRARIES "${OPENGL_LIBRARIES}") + endif() endif() include(${QT_USE_FILE}) @@ -157,6 +163,9 @@ if(TARGET ocv.3rdparty.win32ui) set(OPENCV_HIGHGUI_BUILTIN_BACKEND "WIN32UI") list(APPEND highgui_srcs ${CMAKE_CURRENT_LIST_DIR}/src/window_w32.cpp) list(APPEND tgts ocv.3rdparty.win32ui) + if(HAVE_OPENGL AND OPENGL_LIBRARIES) + list(APPEND tgts "${OPENGL_LIBRARIES}") + endif() endif() endif() @@ -271,14 +280,6 @@ if(APPLE) add_apple_compiler_options(${the_module}) endif() -if(OPENCV_HIGHGUI_BUILTIN_BACKEND STREQUAL "WIN32UI" AND HAVE_OPENGL AND OPENGL_LIBRARIES) - ocv_target_link_libraries(${the_module} PRIVATE "${OPENGL_LIBRARIES}") -endif() - -if(OPENCV_HIGHGUI_BUILTIN_BACKEND MATCHES "^QT" AND HAVE_OPENGL AND OPENGL_LIBRARIES) - ocv_target_link_libraries(${the_module} PRIVATE "${OPENGL_LIBRARIES}") -endif() - if(MSVC AND NOT BUILD_SHARED_LIBS AND BUILD_WITH_STATIC_CRT) set_target_properties(${the_module} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:atlsd.lib /NODEFAULTLIB:libcmt.lib /DEBUG") endif() diff --git a/modules/highgui/src/window.cpp b/modules/highgui/src/window.cpp index 481fee9fbd..81d205a69a 100644 --- a/modules/highgui/src/window.cpp +++ b/modules/highgui/src/window.cpp @@ -963,6 +963,8 @@ void cv::imshow( const String& winname, InputArray _img ) { CV_TRACE_FUNCTION(); + const Size size = _img.size(); + CV_Assert(size.width>0 && size.height>0); { cv::AutoLock lock(cv::getWindowMutex()); cleanupClosedWindows_(); @@ -995,9 +997,7 @@ void cv::imshow( const String& winname, InputArray _img ) } } - const Size size = _img.size(); #ifndef HAVE_OPENGL - CV_Assert(size.width>0 && size.height>0); { Mat img = _img.getMat(); CvMat c_img = cvMat(img); @@ -1005,7 +1005,6 @@ void cv::imshow( const String& winname, InputArray _img ) } #else const double useGl = getWindowProperty(winname, WND_PROP_OPENGL); - CV_Assert(size.width>0 && size.height>0); if (useGl <= 0) { diff --git a/modules/highgui/src/window_QT.cpp b/modules/highgui/src/window_QT.cpp index f6ba44b425..d8f2271faa 100644 --- a/modules/highgui/src/window_QT.cpp +++ b/modules/highgui/src/window_QT.cpp @@ -529,6 +529,9 @@ static int icvInitSystem(int* c, char** v) //"For any GUI application using Qt, there is precisely one QApplication object" if (!QApplication::instance()) { +#if QT_VERSION >= QT_VERSION_CHECK(5, 6, 0) + QCoreApplication::setAttribute(Qt::AA_EnableHighDpiScaling, true); +#endif new QApplication(*c, v); setlocale(LC_NUMERIC,"C"); diff --git a/modules/imgcodecs/include/opencv2/imgcodecs.hpp b/modules/imgcodecs/include/opencv2/imgcodecs.hpp index d7ff9a178d..148eea71e7 100644 --- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp +++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp @@ -98,17 +98,17 @@ enum ImwriteFlags { IMWRITE_EXR_COMPRESSION = (3 << 4) + 1, /* 49 */ //!< override EXR compression type (ZIP_COMPRESSION = 3 is default) IMWRITE_WEBP_QUALITY = 64, //!< For WEBP, it can be a quality from 1 to 100 (the higher is the better). By default (without any parameter) and for quality above 100 the lossless compression is used. IMWRITE_PAM_TUPLETYPE = 128,//!< For PAM, sets the TUPLETYPE field to the corresponding string value that is defined for the format - IMWRITE_TIFF_RESUNIT = 256,//!< For TIFF, use to specify which DPI resolution unit to set; see libtiff documentation for valid values - IMWRITE_TIFF_XDPI = 257,//!< For TIFF, use to specify the X direction DPI - IMWRITE_TIFF_YDPI = 258, //!< For TIFF, use to specify the Y direction DPI - IMWRITE_TIFF_COMPRESSION = 259, //!< For TIFF, use to specify the image compression scheme. See libtiff for integer constants corresponding to compression formats. Note, for images whose depth is CV_32F, only libtiff's SGILOG compression scheme is used. For other supported depths, the compression scheme can be specified by this flag; LZW compression is the default. + IMWRITE_TIFF_RESUNIT = 256,//!< For TIFF, use to specify which DPI resolution unit to set; see libtiff documentation for valid values + IMWRITE_TIFF_XDPI = 257,//!< For TIFF, use to specify the X direction DPI + IMWRITE_TIFF_YDPI = 258,//!< For TIFF, use to specify the Y direction DPI + IMWRITE_TIFF_COMPRESSION = 259,//!< For TIFF, use to specify the image compression scheme. See libtiff for integer constants corresponding to compression formats. Note, for images whose depth is CV_32F, only libtiff's SGILOG compression scheme is used. For other supported depths, the compression scheme can be specified by this flag; LZW compression is the default. IMWRITE_JPEG2000_COMPRESSION_X1000 = 272 //!< For JPEG2000, use to specify the target compression rate (multiplied by 1000). The value can be from 0 to 1000. Default is 1000. }; enum ImwriteEXRTypeFlags { /*IMWRITE_EXR_TYPE_UNIT = 0, //!< not supported */ - IMWRITE_EXR_TYPE_HALF = 1, //!< store as HALF (FP16) - IMWRITE_EXR_TYPE_FLOAT = 2 //!< store as FP32 (default) + IMWRITE_EXR_TYPE_HALF = 1, //!< store as HALF (FP16) + IMWRITE_EXR_TYPE_FLOAT = 2 //!< store as FP32 (default) }; enum ImwriteEXRCompressionFlags { @@ -140,14 +140,14 @@ enum ImwritePNGFlags { IMWRITE_PNG_STRATEGY_FIXED = 4 //!< Using this value prevents the use of dynamic Huffman codes, allowing for a simpler decoder for special applications. }; -//! Imwrite PAM specific tupletype flags used to define the 'TUPETYPE' field of a PAM file. +//! Imwrite PAM specific tupletype flags used to define the 'TUPLETYPE' field of a PAM file. enum ImwritePAMFlags { - IMWRITE_PAM_FORMAT_NULL = 0, - IMWRITE_PAM_FORMAT_BLACKANDWHITE = 1, - IMWRITE_PAM_FORMAT_GRAYSCALE = 2, + IMWRITE_PAM_FORMAT_NULL = 0, + IMWRITE_PAM_FORMAT_BLACKANDWHITE = 1, + IMWRITE_PAM_FORMAT_GRAYSCALE = 2, IMWRITE_PAM_FORMAT_GRAYSCALE_ALPHA = 3, - IMWRITE_PAM_FORMAT_RGB = 4, - IMWRITE_PAM_FORMAT_RGB_ALPHA = 5, + IMWRITE_PAM_FORMAT_RGB = 4, + IMWRITE_PAM_FORMAT_RGB_ALPHA = 5 }; //! @} imgcodecs_flags @@ -209,8 +209,8 @@ CV_EXPORTS_W Mat imread( const String& filename, int flags = IMREAD_COLOR ); The function imreadmulti loads a multi-page image from the specified file into a vector of Mat objects. @param filename Name of file to be loaded. +@param mats A vector of Mat objects holding each page. @param flags Flag that can take values of cv::ImreadModes, default with cv::IMREAD_ANYCOLOR. -@param mats A vector of Mat objects holding each page, if more than one. @sa cv::imread */ CV_EXPORTS_W bool imreadmulti(const String& filename, CV_OUT std::vector& mats, int flags = IMREAD_ANYCOLOR); @@ -219,10 +219,10 @@ CV_EXPORTS_W bool imreadmulti(const String& filename, CV_OUT std::vector& m The function imreadmulti loads a specified range from a multi-page image from the specified file into a vector of Mat objects. @param filename Name of file to be loaded. +@param mats A vector of Mat objects holding each page. @param start Start index of the image to load @param count Count number of images to load @param flags Flag that can take values of cv::ImreadModes, default with cv::IMREAD_ANYCOLOR. -@param mats A vector of Mat objects holding each page, if more than one. @sa cv::imread */ CV_EXPORTS_W bool imreadmulti(const String& filename, CV_OUT std::vector& mats, int start, int count, int flags = IMREAD_ANYCOLOR); diff --git a/modules/imgcodecs/src/grfmt_exr.cpp b/modules/imgcodecs/src/grfmt_exr.cpp index 960f5da3d3..0585035202 100644 --- a/modules/imgcodecs/src/grfmt_exr.cpp +++ b/modules/imgcodecs/src/grfmt_exr.cpp @@ -637,7 +637,7 @@ bool ExrEncoder::write( const Mat& img, const std::vector& params ) for( size_t i = 0; i < params.size(); i += 2 ) { - if( params[i] == CV_IMWRITE_EXR_TYPE ) + if( params[i] == IMWRITE_EXR_TYPE ) { switch( params[i+1] ) { diff --git a/modules/imgcodecs/src/grfmt_jpeg.cpp b/modules/imgcodecs/src/grfmt_jpeg.cpp index 3dd9d68771..17feafc404 100644 --- a/modules/imgcodecs/src/grfmt_jpeg.cpp +++ b/modules/imgcodecs/src/grfmt_jpeg.cpp @@ -643,23 +643,23 @@ bool JpegEncoder::write( const Mat& img, const std::vector& params ) for( size_t i = 0; i < params.size(); i += 2 ) { - if( params[i] == CV_IMWRITE_JPEG_QUALITY ) + if( params[i] == IMWRITE_JPEG_QUALITY ) { quality = params[i+1]; quality = MIN(MAX(quality, 0), 100); } - if( params[i] == CV_IMWRITE_JPEG_PROGRESSIVE ) + if( params[i] == IMWRITE_JPEG_PROGRESSIVE ) { progressive = params[i+1]; } - if( params[i] == CV_IMWRITE_JPEG_OPTIMIZE ) + if( params[i] == IMWRITE_JPEG_OPTIMIZE ) { optimize = params[i+1]; } - if( params[i] == CV_IMWRITE_JPEG_LUMA_QUALITY ) + if( params[i] == IMWRITE_JPEG_LUMA_QUALITY ) { if (params[i+1] >= 0) { @@ -674,7 +674,7 @@ bool JpegEncoder::write( const Mat& img, const std::vector& params ) } } - if( params[i] == CV_IMWRITE_JPEG_CHROMA_QUALITY ) + if( params[i] == IMWRITE_JPEG_CHROMA_QUALITY ) { if (params[i+1] >= 0) { @@ -682,7 +682,7 @@ bool JpegEncoder::write( const Mat& img, const std::vector& params ) } } - if( params[i] == CV_IMWRITE_JPEG_RST_INTERVAL ) + if( params[i] == IMWRITE_JPEG_RST_INTERVAL ) { rst_interval = params[i+1]; rst_interval = MIN(MAX(rst_interval, 0), 65535L); diff --git a/modules/imgcodecs/src/grfmt_jpeg2000_openjpeg.cpp b/modules/imgcodecs/src/grfmt_jpeg2000_openjpeg.cpp index 73d49282d7..c5b1a292cc 100644 --- a/modules/imgcodecs/src/grfmt_jpeg2000_openjpeg.cpp +++ b/modules/imgcodecs/src/grfmt_jpeg2000_openjpeg.cpp @@ -545,7 +545,7 @@ bool Jpeg2KOpjDecoderBase::readHeader() */ bool hasAlpha = false; const int numcomps = image_->numcomps; - CV_Assert(numcomps >= 1); + CV_Check(numcomps, numcomps >= 1 && numcomps <= 4, "Unsupported number of components"); for (int i = 0; i < numcomps; i++) { const opj_image_comp_t& comp = image_->comps[i]; diff --git a/modules/imgcodecs/src/grfmt_pam.cpp b/modules/imgcodecs/src/grfmt_pam.cpp index 4db595055e..1c8f8476a5 100644 --- a/modules/imgcodecs/src/grfmt_pam.cpp +++ b/modules/imgcodecs/src/grfmt_pam.cpp @@ -111,12 +111,12 @@ static bool rgb_convert (void *src, void *target, int width, int target_channels int target_depth); const static struct pam_format formats[] = { - {CV_IMWRITE_PAM_FORMAT_NULL, "", NULL, {0, 0, 0, 0} }, - {CV_IMWRITE_PAM_FORMAT_BLACKANDWHITE, "BLACKANDWHITE", NULL, {0, 0, 0, 0} }, - {CV_IMWRITE_PAM_FORMAT_GRAYSCALE, "GRAYSCALE", NULL, {0, 0, 0, 0} }, - {CV_IMWRITE_PAM_FORMAT_GRAYSCALE_ALPHA, "GRAYSCALE_ALPHA", NULL, {0, 0, 0, 0} }, - {CV_IMWRITE_PAM_FORMAT_RGB, "RGB", rgb_convert, {0, 1, 2, 0} }, - {CV_IMWRITE_PAM_FORMAT_RGB_ALPHA, "RGB_ALPHA", NULL, {0, 1, 2, 0} }, + {IMWRITE_PAM_FORMAT_NULL, "", NULL, {0, 0, 0, 0} }, + {IMWRITE_PAM_FORMAT_BLACKANDWHITE, "BLACKANDWHITE", NULL, {0, 0, 0, 0} }, + {IMWRITE_PAM_FORMAT_GRAYSCALE, "GRAYSCALE", NULL, {0, 0, 0, 0} }, + {IMWRITE_PAM_FORMAT_GRAYSCALE_ALPHA, "GRAYSCALE_ALPHA", NULL, {0, 0, 0, 0} }, + {IMWRITE_PAM_FORMAT_RGB, "RGB", rgb_convert, {0, 1, 2, 0} }, + {IMWRITE_PAM_FORMAT_RGB_ALPHA, "RGB_ALPHA", NULL, {0, 1, 2, 0} }, }; #define PAM_FORMATS_NO (sizeof (fields) / sizeof ((fields)[0])) @@ -341,7 +341,7 @@ PAMDecoder::PAMDecoder() m_offset = -1; m_buf_supported = true; bit_mode = false; - selected_fmt = CV_IMWRITE_PAM_FORMAT_NULL; + selected_fmt = IMWRITE_PAM_FORMAT_NULL; m_maxval = 0; m_channels = 0; m_sampledepth = 0; @@ -462,15 +462,19 @@ bool PAMDecoder::readHeader() if (flds_endhdr && flds_height && flds_width && flds_depth && flds_maxval) { - if (selected_fmt == CV_IMWRITE_PAM_FORMAT_NULL) + if (selected_fmt == IMWRITE_PAM_FORMAT_NULL) { if (m_channels == 1 && m_maxval == 1) - selected_fmt = CV_IMWRITE_PAM_FORMAT_BLACKANDWHITE; + selected_fmt = IMWRITE_PAM_FORMAT_BLACKANDWHITE; else if (m_channels == 1 && m_maxval < 256) - selected_fmt = CV_IMWRITE_PAM_FORMAT_GRAYSCALE; + selected_fmt = IMWRITE_PAM_FORMAT_GRAYSCALE; else if (m_channels == 3 && m_maxval < 256) - selected_fmt = CV_IMWRITE_PAM_FORMAT_RGB; + selected_fmt = IMWRITE_PAM_FORMAT_RGB; + else + CV_Error(Error::StsError, "Can't determine selected_fmt (IMWRITE_PAM_FORMAT_NULL)"); } + CV_CheckDepth(m_sampledepth, m_sampledepth == CV_8U || m_sampledepth == CV_16U, ""); + CV_Check(m_channels, m_channels >= 1 && m_channels <= 4, "Unsupported number of channels"); m_type = CV_MAKETYPE(m_sampledepth, m_channels); m_offset = m_strm.getPos(); @@ -512,7 +516,7 @@ bool PAMDecoder::readData(Mat& img) if( m_offset < 0 || !m_strm.isOpened()) return false; - if (selected_fmt != CV_IMWRITE_PAM_FORMAT_NULL) + if (selected_fmt != IMWRITE_PAM_FORMAT_NULL) fmt = &formats[selected_fmt]; else { /* default layout handling */ @@ -567,6 +571,10 @@ bool PAMDecoder::readData(Mat& img) FillColorRow1( data, src, m_width, palette ); } } + else + { + CV_Error(Error::StsError, cv::format("Unsupported value of target_channels: %d", target_channels)); + } } else { for (int y = 0; y < m_height; y++, data += imp_stride) { @@ -662,8 +670,8 @@ bool PAMEncoder::write( const Mat& img, const std::vector& params ) /* parse save file type */ for( size_t i = 0; i < params.size(); i += 2 ) - if( params[i] == CV_IMWRITE_PAM_TUPLETYPE ) { - if ( params[i+1] > CV_IMWRITE_PAM_FORMAT_NULL && + if( params[i] == IMWRITE_PAM_TUPLETYPE ) { + if ( params[i+1] > IMWRITE_PAM_FORMAT_NULL && params[i+1] < (int) PAM_FORMATS_NO) fmt = &formats[params[i+1]]; } diff --git a/modules/imgcodecs/src/grfmt_tiff.cpp b/modules/imgcodecs/src/grfmt_tiff.cpp index 5e7523b203..36cf17e1e3 100644 --- a/modules/imgcodecs/src/grfmt_tiff.cpp +++ b/modules/imgcodecs/src/grfmt_tiff.cpp @@ -112,6 +112,8 @@ static bool cv_tiffSetErrorHandler() static const char fmtSignTiffII[] = "II\x2a\x00"; static const char fmtSignTiffMM[] = "MM\x00\x2a"; +static const char fmtSignBigTiffII[] = "II\x2b\x00"; +static const char fmtSignBigTiffMM[] = "MM\x00\x2b"; TiffDecoder::TiffDecoder() { @@ -140,13 +142,15 @@ bool TiffDecoder::checkSignature( const String& signature ) const { return signature.size() >= 4 && (memcmp(signature.c_str(), fmtSignTiffII, 4) == 0 || - memcmp(signature.c_str(), fmtSignTiffMM, 4) == 0); + memcmp(signature.c_str(), fmtSignTiffMM, 4) == 0 || + memcmp(signature.c_str(), fmtSignBigTiffII, 4) == 0 || + memcmp(signature.c_str(), fmtSignBigTiffMM, 4) == 0); } int TiffDecoder::normalizeChannelsNumber(int channels) const { - CV_Assert(channels <= 4); - return channels > 4 ? 4 : channels; + CV_Check(channels, channels >= 1 && channels <= 4, "Unsupported number of channels"); + return channels; } ImageDecoder TiffDecoder::newDecoder() const @@ -295,34 +299,53 @@ bool TiffDecoder::readHeader() (ncn != 1 && ncn != 3 && ncn != 4))) bpp = 8; + uint16 sample_format = SAMPLEFORMAT_UINT; + TIFFGetField(tif, TIFFTAG_SAMPLEFORMAT, &sample_format); int wanted_channels = normalizeChannelsNumber(ncn); - switch(bpp) + switch (bpp) { - case 1: - m_type = CV_MAKETYPE(CV_8U, !isGrayScale ? wanted_channels : 1); - result = true; - break; - case 8: - //Palette color, the value of the component is used as an index into the red, - //green and blue curves in the ColorMap field to retrieve an RGB triplet that defines the color. - if(photometric == PHOTOMETRIC_PALETTE) - m_type = CV_MAKETYPE(CV_8U, 3); - else - m_type = CV_MAKETYPE(CV_8U, !isGrayScale ? wanted_channels : 1); - result = true; - break; - case 16: - m_type = CV_MAKETYPE(CV_16U, !isGrayScale ? wanted_channels : 1); - result = true; - break; - case 32: - m_type = CV_MAKETYPE(CV_32F, wanted_channels); - result = true; - break; - case 64: - m_type = CV_MAKETYPE(CV_64F, wanted_channels); - result = true; - break; + case 1: + { + CV_Check((int)sample_format, sample_format == SAMPLEFORMAT_UINT || sample_format == SAMPLEFORMAT_INT, ""); + int depth = sample_format == SAMPLEFORMAT_INT ? CV_8S : CV_8U; + m_type = CV_MAKETYPE(depth, !isGrayScale ? wanted_channels : 1); + result = true; + break; + } + case 8: + { + //Palette color, the value of the component is used as an index into the red, + //green and blue curves in the ColorMap field to retrieve an RGB triplet that defines the color. + CV_Check((int)sample_format, sample_format == SAMPLEFORMAT_UINT || sample_format == SAMPLEFORMAT_INT, ""); + int depth = sample_format == SAMPLEFORMAT_INT ? CV_8S : CV_8U; + if (photometric == PHOTOMETRIC_PALETTE) + m_type = CV_MAKETYPE(depth, 3); + else + m_type = CV_MAKETYPE(depth, !isGrayScale ? wanted_channels : 1); + result = true; + break; + } + case 16: + { + CV_Check((int)sample_format, sample_format == SAMPLEFORMAT_UINT || sample_format == SAMPLEFORMAT_INT, ""); + int depth = sample_format == SAMPLEFORMAT_INT ? CV_16S : CV_16U; + m_type = CV_MAKETYPE(depth, !isGrayScale ? wanted_channels : 1); + result = true; + break; + } + case 32: + { + CV_Check((int)sample_format, sample_format == SAMPLEFORMAT_IEEEFP || sample_format == SAMPLEFORMAT_INT, ""); + int depth = sample_format == SAMPLEFORMAT_IEEEFP ? CV_32F : CV_32S; + m_type = CV_MAKETYPE(depth, wanted_channels); + result = true; + break; + } + case 64: + CV_CheckEQ((int)sample_format, SAMPLEFORMAT_IEEEFP, ""); + m_type = CV_MAKETYPE(CV_64F, wanted_channels); + result = true; + break; default: CV_Error(cv::Error::StsError, "Invalid bitsperpixel value read from TIFF header! Must be 1, 8, 16, 32 or 64."); } @@ -432,7 +455,7 @@ bool TiffDecoder::readData( Mat& img ) bool color = img.channels() > 1; - CV_CheckType(type, depth == CV_8U || depth == CV_16U || depth == CV_32F || depth == CV_64F, ""); + CV_CheckType(type, depth == CV_8U || depth == CV_8S || depth == CV_16U || depth == CV_16S || depth == CV_32S || depth == CV_32F || depth == CV_64F, ""); if (m_width && m_height) { @@ -649,7 +672,7 @@ bool TiffDecoder::readData( Mat& img ) CV_TIFF_CHECK_CALL((int)TIFFReadEncodedTile(tif, tileidx, buffer, buffer_size) >= 0); } - Mat m_tile(Size(tile_width0, tile_height0), CV_MAKETYPE((dst_bpp == 32) ? CV_32F : CV_64F, ncn), buffer); + Mat m_tile(Size(tile_width0, tile_height0), CV_MAKETYPE((dst_bpp == 32) ? (depth == CV_32S ? CV_32S : CV_32F) : CV_64F, ncn), buffer); Rect roi_tile(0, 0, tile_width, tile_height); Rect roi_img(x, img_y, tile_width, tile_height); if (!m_hdr && ncn == 3) @@ -698,7 +721,7 @@ ImageEncoder TiffEncoder::newEncoder() const bool TiffEncoder::isFormatSupported( int depth ) const { - return depth == CV_8U || depth == CV_16U || depth == CV_32F || depth == CV_64F; + return depth == CV_8U || depth == CV_8S || depth == CV_16U || depth == CV_16S || depth == CV_32S || depth == CV_32F || depth == CV_64F; } void TiffEncoder::writeTag( WLByteStream& strm, TiffTag tag, @@ -842,7 +865,7 @@ bool TiffEncoder::writeLibTiff( const std::vector& img_vec, const std::vect int width = img.cols, height = img.rows; int type = img.type(); int depth = CV_MAT_DEPTH(type); - CV_CheckType(type, depth == CV_8U || depth == CV_16U || depth == CV_32F || depth == CV_64F, ""); + CV_CheckType(type, depth == CV_8U || depth == CV_8S || depth == CV_16U || depth == CV_16S || depth == CV_32S || depth == CV_32F || depth == CV_64F, ""); CV_CheckType(type, channels >= 1 && channels <= 4, ""); CV_TIFF_CHECK_CALL(TIFFSetField(tif, TIFFTAG_IMAGEWIDTH, width)); @@ -865,19 +888,31 @@ bool TiffEncoder::writeLibTiff( const std::vector& img_vec, const std::vect int page_compression = compression; int bitsPerChannel = -1; + uint16 sample_format = SAMPLEFORMAT_INT; switch (depth) { case CV_8U: + sample_format = SAMPLEFORMAT_UINT; + /* FALLTHRU */ + case CV_8S: { bitsPerChannel = 8; break; } + case CV_16U: + sample_format = SAMPLEFORMAT_UINT; + /* FALLTHRU */ + case CV_16S: { bitsPerChannel = 16; break; } + case CV_32F: + sample_format = SAMPLEFORMAT_IEEEFP; + /* FALLTHRU */ + case CV_32S: { bitsPerChannel = 32; page_compression = COMPRESSION_NONE; @@ -887,6 +922,7 @@ bool TiffEncoder::writeLibTiff( const std::vector& img_vec, const std::vect { bitsPerChannel = 64; page_compression = COMPRESSION_NONE; + sample_format = SAMPLEFORMAT_IEEEFP; break; } default: @@ -912,7 +948,7 @@ bool TiffEncoder::writeLibTiff( const std::vector& img_vec, const std::vect CV_TIFF_CHECK_CALL(TIFFSetField(tif, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG)); CV_TIFF_CHECK_CALL(TIFFSetField(tif, TIFFTAG_ROWSPERSTRIP, rowsPerStrip)); - CV_TIFF_CHECK_CALL(TIFFSetField(tif, TIFFTAG_SAMPLEFORMAT, depth >= CV_32F ? SAMPLEFORMAT_IEEEFP : SAMPLEFORMAT_UINT)); + CV_TIFF_CHECK_CALL(TIFFSetField(tif, TIFFTAG_SAMPLEFORMAT, sample_format)); if (page_compression != COMPRESSION_NONE) { @@ -1011,7 +1047,7 @@ bool TiffEncoder::write( const Mat& img, const std::vector& params) int type = img.type(); int depth = CV_MAT_DEPTH(type); - CV_CheckType(type, depth == CV_8U || depth == CV_16U || depth == CV_32F || depth == CV_64F, ""); + CV_CheckType(type, depth == CV_8U || depth == CV_8S || depth == CV_16U || depth == CV_16S || depth == CV_32S || depth == CV_32F || depth == CV_64F, ""); std::vector img_vec; img_vec.push_back(img); diff --git a/modules/imgcodecs/src/grfmt_webp.cpp b/modules/imgcodecs/src/grfmt_webp.cpp index e137b8734d..3860abb64e 100644 --- a/modules/imgcodecs/src/grfmt_webp.cpp +++ b/modules/imgcodecs/src/grfmt_webp.cpp @@ -243,7 +243,7 @@ bool WebPEncoder::write(const Mat& img, const std::vector& params) if (params.size() > 1) { - if (params[0] == CV_IMWRITE_WEBP_QUALITY) + if (params[0] == IMWRITE_WEBP_QUALITY) { comp_lossless = false; quality = static_cast(params[1]); diff --git a/modules/imgcodecs/src/loadsave.cpp b/modules/imgcodecs/src/loadsave.cpp index 91f30cfe98..e9b6d0517c 100644 --- a/modules/imgcodecs/src/loadsave.cpp +++ b/modules/imgcodecs/src/loadsave.cpp @@ -562,7 +562,7 @@ imreadmulti_(const String& filename, int flags, std::vector& mats, int star if ((flags & IMREAD_ANYDEPTH) == 0) type = CV_MAKETYPE(CV_8U, CV_MAT_CN(type)); - if ((flags & CV_LOAD_IMAGE_COLOR) != 0 || + if ((flags & IMREAD_COLOR) != 0 || ((flags & IMREAD_ANYCOLOR) != 0 && CV_MAT_CN(type) > 1)) type = CV_MAKETYPE(CV_MAT_DEPTH(type), 3); else diff --git a/modules/imgcodecs/src/precomp.hpp b/modules/imgcodecs/src/precomp.hpp index aa2a999f63..70cc1e7105 100644 --- a/modules/imgcodecs/src/precomp.hpp +++ b/modules/imgcodecs/src/precomp.hpp @@ -43,11 +43,8 @@ #define __IMGCODECS_H_ #include "opencv2/imgcodecs.hpp" -#include "opencv2/imgcodecs/legacy/constants_c.h" - #include "opencv2/core/utility.hpp" #include "opencv2/core/private.hpp" - #include "opencv2/imgproc.hpp" #include diff --git a/modules/imgcodecs/test/test_tiff.cpp b/modules/imgcodecs/test/test_tiff.cpp index a2f9655c73..1c6e4a6b29 100644 --- a/modules/imgcodecs/test/test_tiff.cpp +++ b/modules/imgcodecs/test/test_tiff.cpp @@ -147,6 +147,26 @@ TEST(Imgcodecs_Tiff, decode_infinite_rowsperstrip) EXPECT_EQ(0, remove(filename.c_str())); } +TEST(Imgcodecs_Tiff, readWrite_unsigned) +{ + const string root = cvtest::TS::ptr()->get_data_path(); + const string filenameInput = root + "readwrite/gray_8u.tif"; + const string filenameOutput = cv::tempfile(".tiff"); + const Mat img = cv::imread(filenameInput, IMREAD_UNCHANGED); + ASSERT_FALSE(img.empty()); + ASSERT_EQ(CV_8UC1, img.type()); + + Mat matS8; + img.convertTo(matS8, CV_8SC1); + + ASSERT_TRUE(cv::imwrite(filenameOutput, matS8)); + const Mat img2 = cv::imread(filenameOutput, IMREAD_UNCHANGED); + ASSERT_EQ(img2.type(), matS8.type()); + ASSERT_EQ(img2.size(), matS8.size()); + EXPECT_LE(cvtest::norm(matS8, img2, NORM_INF | NORM_RELATIVE), 1e-3); + EXPECT_EQ(0, remove(filenameOutput.c_str())); +} + TEST(Imgcodecs_Tiff, readWrite_32FC1) { const string root = cvtest::TS::ptr()->get_data_path(); @@ -455,6 +475,29 @@ TEST(Imgcodecs_Tiff, read_multipage_indexed) } } +TEST(Imgcodecs_Tiff, read_bigtiff_images) +{ + const string root = cvtest::TS::ptr()->get_data_path(); + const string filenamesInput[] = { + "readwrite/BigTIFF.tif", + "readwrite/BigTIFFMotorola.tif", + "readwrite/BigTIFFLong.tif", + "readwrite/BigTIFFLong8.tif", + "readwrite/BigTIFFMotorolaLongStrips.tif", + "readwrite/BigTIFFLong8Tiles.tif", + "readwrite/BigTIFFSubIFD4.tif", + "readwrite/BigTIFFSubIFD8.tif" + }; + + for (int i = 0; i < 8; i++) + { + const Mat bigtiff_img = imread(root + filenamesInput[i], IMREAD_UNCHANGED); + ASSERT_FALSE(bigtiff_img.empty()); + EXPECT_EQ(64, bigtiff_img.cols); + EXPECT_EQ(64, bigtiff_img.rows); + ASSERT_EQ(CV_8UC3, bigtiff_img.type()); + } +} #endif diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index 69b5b0accd..cb7e4d8b98 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -3576,10 +3576,11 @@ a mask and then extract the contour, or copy the region to another image, and so function unless the #FLOODFILL_MASK_ONLY flag is set in the second variant of the function. See the details below. @param mask Operation mask that should be a single-channel 8-bit image, 2 pixels wider and 2 pixels -taller than image. Since this is both an input and output parameter, you must take responsibility -of initializing it. Flood-filling cannot go across non-zero pixels in the input mask. For example, +taller than image. If an empty Mat is passed it will be created automatically. Since this is both an +input and output parameter, you must take responsibility of initializing it. +Flood-filling cannot go across non-zero pixels in the input mask. For example, an edge detector output can be used as a mask to stop filling at edges. On output, pixels in the -mask corresponding to filled pixels in the image are set to 1 or to the a value specified in flags +mask corresponding to filled pixels in the image are set to 1 or to the specified value in flags as described below. Additionally, the function fills the border of the mask with ones to simplify internal processing. It is therefore possible to use the same mask in multiple calls to the function to make sure the filled areas do not overlap. @@ -4956,13 +4957,13 @@ CV_EXPORTS_W Rect getTextSize( Size imgsize, const String& text, Point org, FontFace& fface, int size, int weight=0, PutTextFlags flags=PUT_TEXT_ALIGN_LEFT, Range wrap=Range() ); -/** @brief Line iterator -The class is used to iterate over all the pixels on the raster line -segment connecting two specified points. -The class LineIterator is used to get each pixel of a raster line. It -can be treated as versatile implementation of the Bresenham algorithm +/** @brief Class for iterating over all pixels on a raster line segment. + +The class LineIterator is used to get each pixel of a raster line connecting +two specified points. +It can be treated as a versatile implementation of the Bresenham algorithm where you can stop at each pixel and do some extra processing, for example, grab pixel values along the line or draw a line with an effect (for example, with XOR operation). @@ -4991,14 +4992,19 @@ for(int i = 0; i < it2.count; i++, ++it2) class CV_EXPORTS LineIterator { public: - /** @brief initializes the iterator + /** @brief Initializes iterator object for the given line and image. - creates iterators for the line connecting pt1 and pt2 - the line will be clipped on the image boundaries - the line is 8-connected or 4-connected - If leftToRight=true, then the iteration is always done - from the left-most point to the right most, - not to depend on the ordering of pt1 and pt2 parameters; + The returned iterator can be used to traverse all pixels on a line that + connects the given two points. + The line will be clipped on the image boundaries. + + @param img Underlying image. + @param pt1 First endpoint of the line. + @param pt2 The other endpoint of the line. + @param connectivity Pixel connectivity of the iterator. Valid values are 4 (iterator can move + up, down, left and right) and 8 (iterator can also move diagonally). + @param leftToRight If true, the line is traversed from the leftmost endpoint to the rightmost + endpoint. Otherwise, the line is traversed from \p pt1 to \p pt2. */ LineIterator( const Mat& img, Point pt1, Point pt2, int connectivity = 8, bool leftToRight = false ) @@ -5031,16 +5037,23 @@ public: } void init(const Mat* img, Rect boundingAreaRect, Point pt1, Point pt2, int connectivity, bool leftToRight); - /** @brief returns pointer to the current pixel + /** @brief Returns pointer to the current pixel. */ uchar* operator *(); - /** @brief prefix increment operator (++it). shifts iterator to the next pixel + + /** @brief Moves iterator to the next pixel on the line. + + This is the prefix version (++it). */ LineIterator& operator ++(); - /** @brief postfix increment operator (it++). shifts iterator to the next pixel + + /** @brief Moves iterator to the next pixel on the line. + + This is the postfix version (it++). */ LineIterator operator ++(int); - /** @brief returns coordinates of the current pixel + + /** @brief Returns coordinates of the current pixel. */ Point pos() const; diff --git a/modules/imgproc/src/connectedcomponents.cpp b/modules/imgproc/src/connectedcomponents.cpp index 1ad74ed38a..f2d41f454d 100644 --- a/modules/imgproc/src/connectedcomponents.cpp +++ b/modules/imgproc/src/connectedcomponents.cpp @@ -1570,7 +1570,7 @@ namespace cv{ #define CONDITION_S img_row[c - 1] > 0 #define CONDITION_X img_row[c] > 0 -#define ACTION_1 // nothing to do +#define ACTION_1 img_labels_row[c] = 0; #define ACTION_2 img_labels_row[c] = label; \ P_[label] = label; \ label = label + 1; @@ -1831,7 +1831,7 @@ namespace cv{ std::vector P_(Plength, 0); LabelT* P = P_.data(); - //P[0] = 0; + P[0] = 0; LabelT lunique = 1; // First scan @@ -1851,7 +1851,7 @@ namespace cv{ #define CONDITION_S img_row[c - 1] > 0 #define CONDITION_X img_row[c] > 0 -#define ACTION_1 // nothing to do +#define ACTION_1 img_labels_row[c] = 0; #define ACTION_2 img_labels_row[c] = lunique; \ P[lunique] = lunique; \ lunique = lunique + 1; // new label diff --git a/modules/imgproc/src/drawing.cpp b/modules/imgproc/src/drawing.cpp index ac56791f42..90844c7f51 100644 --- a/modules/imgproc/src/drawing.cpp +++ b/modules/imgproc/src/drawing.cpp @@ -673,7 +673,7 @@ Line2( Mat& img, Point2l pt1, Point2l pt2, const void* color) pt1.y ^= pt2.y & j; x_step = XY_ONE; - y_step = (dy << XY_SHIFT) / (ax | 1); + y_step = dy * (1 << XY_SHIFT) / (ax | 1); ecount = (int)((pt2.x - pt1.x) >> XY_SHIFT); } else @@ -686,7 +686,7 @@ Line2( Mat& img, Point2l pt1, Point2l pt2, const void* color) pt2.y ^= pt1.y & i; pt1.y ^= pt2.y & i; - x_step = (dx << XY_SHIFT) / (ay | 1); + x_step = dx * (1 << XY_SHIFT) / (ay | 1); y_step = XY_ONE; ecount = (int)((pt2.y - pt1.y) >> XY_SHIFT); } diff --git a/modules/imgproc/src/floodfill.cpp b/modules/imgproc/src/floodfill.cpp index 2816795bc6..8595011d48 100644 --- a/modules/imgproc/src/floodfill.cpp +++ b/modules/imgproc/src/floodfill.cpp @@ -477,11 +477,10 @@ int cv::floodFill( InputOutputArray _image, InputOutputArray _mask, nv_buf._[0] = nv_buf._[1] = nv_buf._[2] = nv_buf._[3] = 0; struct { Vec3b b; Vec3i i; Vec3f f; } ld_buf, ud_buf; - Mat img = _image.getMat(), mask; - if( !_mask.empty() ) - mask = _mask.getMat(); - Size size = img.size(); + Mat img = _image.getMat(), mask; + + Size size = img.size(); int type = img.type(); int depth = img.depth(); int cn = img.channels(); @@ -495,6 +494,20 @@ int cv::floodFill( InputOutputArray _image, InputOutputArray _mask, if( connectivity != 0 && connectivity != 4 && connectivity != 8 ) CV_Error( CV_StsBadFlag, "Connectivity must be 4, 0(=4) or 8" ); + if( _mask.empty() ) + { + _mask.create( size.height + 2, size.width + 2, CV_8UC1 ); + _mask.setTo(0); + } + + mask = _mask.getMat(); + CV_CheckTypeEQ( mask.type(), CV_8U, "" ); + CV_CheckEQ( mask.rows, size.height + 2, "" ); + CV_CheckEQ( mask.cols, size.width + 2, "" ); + + Mat mask_inner = mask( Rect(1, 1, mask.cols - 2, mask.rows - 2) ); + copyMakeBorder( mask_inner, mask, 1, 1, 1, 1, BORDER_ISOLATED | BORDER_CONSTANT, Scalar(1) ); + bool is_simple = mask.empty() && (flags & FLOODFILL_MASK_ONLY) == 0; for( i = 0; i < cn; i++ ) @@ -544,26 +557,6 @@ int cv::floodFill( InputOutputArray _image, InputOutputArray _mask, } } - if( mask.empty() ) - { - Mat tempMask( size.height + 2, size.width + 2, CV_8UC1 ); - tempMask.setTo(Scalar::all(0)); - mask = tempMask; - } - else - { - CV_Assert( mask.rows == size.height+2 && mask.cols == size.width+2 ); - CV_Assert( mask.type() == CV_8U ); - } - - memset( mask.ptr(), 1, mask.cols ); - memset( mask.ptr(mask.rows-1), 1, mask.cols ); - - for( i = 1; i <= size.height; i++ ) - { - mask.at(i, 0) = mask.at(i, mask.cols-1) = (uchar)1; - } - if( depth == CV_8U ) for( i = 0; i < cn; i++ ) { @@ -632,7 +625,8 @@ int cv::floodFill( InputOutputArray _image, Point seedPoint, { CV_INSTRUMENT_REGION(); - return floodFill(_image, Mat(), seedPoint, newVal, rect, loDiff, upDiff, flags); + Mat mask; + return floodFill(_image, mask, seedPoint, newVal, rect, loDiff, upDiff, flags); } diff --git a/modules/imgproc/src/histogram.cpp b/modules/imgproc/src/histogram.cpp index c348828ff0..1fbb9aae51 100644 --- a/modules/imgproc/src/histogram.cpp +++ b/modules/imgproc/src/histogram.cpp @@ -909,7 +909,8 @@ static bool ipp_calchist(const Mat &image, Mat &hist, int histSize, const float* #endif // IPP_DISABLE_HISTOGRAM - https://github.com/opencv/opencv/issues/11544 - if (uniform && (ranges[0][1] - ranges[0][0]) != histSize) + // and https://github.com/opencv/opencv/issues/21595 + if ((uniform && (ranges[0][1] - ranges[0][0]) != histSize) || abs(ranges[0][0]) != cvFloor(ranges[0][0])) return false; Mat ihist = hist; diff --git a/modules/imgproc/test/test_connectedcomponents.cpp b/modules/imgproc/test/test_connectedcomponents.cpp index ed11ea6fda..e1a6b761c7 100644 --- a/modules/imgproc/test/test_connectedcomponents.cpp +++ b/modules/imgproc/test/test_connectedcomponents.cpp @@ -789,5 +789,16 @@ TEST(Imgproc_ConnectedComponents, single_column) } +TEST(Imgproc_ConnectedComponents, 4conn_regression_21366) +{ + Mat src = Mat::zeros(Size(10, 10), CV_8UC1); + { + Mat labels, stats, centroids; + EXPECT_NO_THROW(cv::connectedComponentsWithStats(src, labels, stats, centroids, 4)); + } +} + + + } } // namespace diff --git a/modules/imgproc/test/test_floodfill.cpp b/modules/imgproc/test/test_floodfill.cpp index b880c4ee37..934e421fba 100644 --- a/modules/imgproc/test/test_floodfill.cpp +++ b/modules/imgproc/test/test_floodfill.cpp @@ -531,11 +531,11 @@ TEST(Imgproc_FloodFill, maskValue) { const int n = 50; Mat img = Mat::zeros(n, n, CV_8U); - Mat mask = Mat::zeros(n + 2, n + 2, CV_8U); + Mat mask; circle(img, Point(n/2, n/2), 20, Scalar(100), 4); - int flags = 4 + CV_FLOODFILL_MASK_ONLY; + int flags = 4 + FLOODFILL_MASK_ONLY; floodFill(img, mask, Point(n/2 + 13, n/2), Scalar(100), NULL, Scalar(), Scalar(), flags); ASSERT_EQ(1, cvtest::norm(mask.rowRange(1, n-1).colRange(1, n-1), NORM_INF)); diff --git a/modules/imgproc/test/test_histograms.cpp b/modules/imgproc/test/test_histograms.cpp index a6c75a318d..b57af774f2 100644 --- a/modules/imgproc/test/test_histograms.cpp +++ b/modules/imgproc/test/test_histograms.cpp @@ -1993,6 +1993,38 @@ TEST(Imgproc_Hist_Calc, badarg) EXPECT_NO_THROW(cv::calcBackProject(&img, 1, channels, hist, backProj, NULL, 1, true)); } +TEST(Imgproc_Hist_Calc, IPP_ranges_with_equal_exponent_21595) +{ + const int channels[] = { 0 }; + float range1[] = { -0.5f, 1.5f }; + const float* ranges[] = { range1 }; + const int hist_size[] = { 2 }; + + uint8_t m[1][6] = { { 0, 1, 0, 1 , 1, 1 } }; + cv::Mat images_u = Mat(1, 6, CV_8UC1, m); + cv::Mat histogram_u; + cv::calcHist(&images_u, 1, channels, noArray(), histogram_u, 1, hist_size, ranges); + + ASSERT_EQ(histogram_u.at(0), 2.f) << "0 not counts correctly, res: " << histogram_u.at(0); + ASSERT_EQ(histogram_u.at(1), 4.f) << "1 not counts correctly, res: " << histogram_u.at(0); +} + +TEST(Imgproc_Hist_Calc, IPP_ranges_with_nonequal_exponent_21595) +{ + const int channels[] = { 0 }; + float range1[] = { -1.3f, 1.5f }; + const float* ranges[] = { range1 }; + const int hist_size[] = { 3 }; + + uint8_t m[1][6] = { { 0, 1, 0, 1 , 1, 1 } }; + cv::Mat images_u = Mat(1, 6, CV_8UC1, m); + cv::Mat histogram_u; + cv::calcHist(&images_u, 1, channels, noArray(), histogram_u, 1, hist_size, ranges); + + ASSERT_EQ(histogram_u.at(0), 0.f) << "not equal to zero, res: " << histogram_u.at(0); + ASSERT_EQ(histogram_u.at(1), 2.f) << "0 not counts correctly, res: " << histogram_u.at(1); + ASSERT_EQ(histogram_u.at(2), 4.f) << "1 not counts correctly, res: " << histogram_u.at(2); +} }} // namespace /* End Of File */ diff --git a/modules/ml/src/em.cpp b/modules/ml/src/em.cpp index ec73bfd1b5..3e0eeb560a 100644 --- a/modules/ml/src/em.cpp +++ b/modules/ml/src/em.cpp @@ -656,7 +656,7 @@ public: // Update weights // not normalized first - reduce(trainProbs, weights, 0, CV_REDUCE_SUM); + reduce(trainProbs, weights, 0, REDUCE_SUM); // Update means means.create(nclusters, dim, CV_64FC1); diff --git a/modules/ml/test/test_precomp.hpp b/modules/ml/test/test_precomp.hpp index e2d36d2c2d..380e612616 100644 --- a/modules/ml/test/test_precomp.hpp +++ b/modules/ml/test/test_precomp.hpp @@ -4,7 +4,6 @@ #include "opencv2/ts.hpp" #include // EXPECT_MAT_NEAR #include "opencv2/ml.hpp" -#include "opencv2/core/core_c.h" #include using std::ifstream; diff --git a/modules/objdetect/CMakeLists.txt b/modules/objdetect/CMakeLists.txt index 411386fd7d..27480c7078 100644 --- a/modules/objdetect/CMakeLists.txt +++ b/modules/objdetect/CMakeLists.txt @@ -1,5 +1,16 @@ set(the_description "Object Detection") -ocv_define_module(objdetect opencv_core opencv_imgproc opencv_3d opencv_dnn WRAP java objc python js) +ocv_define_module(objdetect + opencv_core + opencv_imgproc + opencv_3d + OPTIONAL + opencv_dnn + WRAP + python + java + objc + js +) if(HAVE_QUIRC) get_property(QUIRC_INCLUDE GLOBAL PROPERTY QUIRC_INCLUDE_DIR) diff --git a/modules/objdetect/include/opencv2/objdetect.hpp b/modules/objdetect/include/opencv2/objdetect.hpp index fa81779f40..13271cebf4 100644 --- a/modules/objdetect/include/opencv2/objdetect.hpp +++ b/modules/objdetect/include/opencv2/objdetect.hpp @@ -49,8 +49,8 @@ /** @defgroup objdetect Object Detection -Haar Feature-based Cascade Classifier for Object Detection ----------------------------------------------------------- +@{ + @defgroup objdetect_cascade_classifier Cascade Classifier for Object Detection The object detector described below has been initially proposed by Paul Viola @cite Viola01 and improved by Rainer Lienhart @cite Lienhart02 . @@ -90,8 +90,7 @@ middle) and the sum of the image pixels under the black stripe multiplied by 3 i compensate for the differences in the size of areas. The sums of pixel values over a rectangular regions are calculated rapidly using integral images (see below and the integral description). -To see the object detector at work, have a look at the facedetect demo: - +Check @ref tutorial_cascade_classifier "the corresponding tutorial" for more details. The following reference is for the detection part only. There is a separate application called opencv_traincascade that can train a cascade of boosted classifiers from a set of samples. @@ -99,10 +98,13 @@ opencv_traincascade that can train a cascade of boosted classifiers from a set o @note In the new C++ interface it is also possible to use LBP (local binary pattern) features in addition to Haar-like features. .. [Viola01] Paul Viola and Michael J. Jones. Rapid Object Detection using a Boosted Cascade of Simple Features. IEEE CVPR, 2001. The paper is available online at - + -@{ - @defgroup objdetect_c C API + @defgroup objdetect_hog HOG (Histogram of Oriented Gradients) descriptor and object detector + @defgroup objdetect_qrcode QRCode detection and encoding + @defgroup objdetect_dnn_face DNN-based face detection and recognition +Check @ref tutorial_dnn_face "the corresponding tutorial" for more details. + @defgroup objdetect_common Common functions and classes @} */ @@ -111,13 +113,15 @@ typedef struct CvHaarClassifierCascade CvHaarClassifierCascade; namespace cv { -//! @addtogroup objdetect +//! @addtogroup objdetect_common //! @{ ///////////////////////////// Object Detection //////////////////////////// -//! class for grouping object candidates, detected by Cascade Classifier, HOG etc. -//! instance of the class is to be passed to cv::partition (see cxoperations.hpp) +/** @brief This class is used for grouping object candidates detected by Cascade Classifier, HOG etc. + +instance of the class is to be passed to cv::partition + */ class CV_EXPORTS SimilarRects { public: @@ -162,6 +166,10 @@ CV_EXPORTS void groupRectangles(std::vector& rectList, std::vector& CV_EXPORTS void groupRectangles_meanshift(std::vector& rectList, std::vector& foundWeights, std::vector& foundScales, double detectThreshold = 0.0, Size winDetSize = Size(64, 128)); +//! @} + +//! @addtogroup objdetect_cascade_classifier +//! @{ template<> struct DefaultDeleter{ CV_EXPORTS void operator ()(CvHaarClassifierCascade* obj) const; }; @@ -243,7 +251,7 @@ public: CV_WRAP bool load( const String& filename ); /** @brief Reads a classifier from a FileStorage node. - @note The file may contain a new cascade classifier (trained traincascade application) only. + @note The file may contain a new cascade classifier (trained by the traincascade application) only. */ CV_WRAP bool read( const FileNode& node ); @@ -260,12 +268,6 @@ public: cvHaarDetectObjects. It is not used for a new cascade. @param minSize Minimum possible object size. Objects smaller than that are ignored. @param maxSize Maximum possible object size. Objects larger than that are ignored. If `maxSize == minSize` model is evaluated on single scale. - - The function is parallelized with the TBB library. - - @note - - (Python) A face detection example using cascade classifiers can be found at - opencv_source_code/samples/python/facedetect.py */ CV_WRAP void detectMultiScale( InputArray image, CV_OUT std::vector& objects, @@ -338,7 +340,10 @@ public: }; CV_EXPORTS Ptr createFaceDetectionMaskGenerator(); +//! @} +//! @addtogroup objdetect_hog +//! @{ //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector ////////////// //! struct for detection region of interest (ROI) @@ -378,7 +383,7 @@ public: }; enum DescriptorStorageFormat { DESCR_FORMAT_COL_BY_COL, DESCR_FORMAT_ROW_BY_ROW }; - /**@brief Creates the HOG descriptor and detector with default params. + /**@brief Creates the HOG descriptor and detector with default parameters. aqual to HOGDescriptor(Size(64,128), Size(16,16), Size(8,8), Size(8,8), 9 ) */ @@ -414,6 +419,8 @@ public: {} /** @overload + + Creates the HOG descriptor and detector and loads HOGDescriptor parameters and coefficients for the linear SVM classifier from a file. @param filename The file name containing HOGDescriptor properties and coefficients for the linear SVM classifier. */ CV_WRAP HOGDescriptor(const String& filename) @@ -452,19 +459,19 @@ public: */ CV_WRAP virtual void setSVMDetector(InputArray svmdetector); - /** @brief Reads HOGDescriptor parameters from a cv::FileNode. + /** @brief Reads HOGDescriptor parameters and coefficients for the linear SVM classifier from a file node. @param fn File node */ virtual bool read(FileNode& fn); - /** @brief Stores HOGDescriptor parameters in a cv::FileStorage. + /** @brief Stores HOGDescriptor parameters and coefficients for the linear SVM classifier in a file storage. @param fs File storage @param objname Object name */ virtual void write(FileStorage& fs, const String& objname) const; - /** @brief loads HOGDescriptor parameters and coefficients for the linear SVM classifier from a file. - @param filename Path of the file to read. + /** @brief loads HOGDescriptor parameters and coefficients for the linear SVM classifier from a file + @param filename Name of the file to read. @param objname The optional name of the node to read (if empty, the first top-level node will be used). */ CV_WRAP virtual bool load(const String& filename, const String& objname = String()); @@ -537,13 +544,14 @@ public: @param winStride Window stride. It must be a multiple of block stride. @param padding Padding @param scale Coefficient of the detection window increase. - @param finalThreshold Final threshold + @param groupThreshold Coefficient to regulate the similarity threshold. When detected, some objects can be covered + by many rectangles. 0 means not to perform grouping. @param useMeanshiftGrouping indicates grouping algorithm */ CV_WRAP virtual void detectMultiScale(InputArray img, CV_OUT std::vector& foundLocations, CV_OUT std::vector& foundWeights, double hitThreshold = 0, Size winStride = Size(), Size padding = Size(), double scale = 1.05, - double finalThreshold = 2.0,bool useMeanshiftGrouping = false) const; + double groupThreshold = 2.0, bool useMeanshiftGrouping = false) const; /** @brief Detects objects of different sizes in the input image. The detected objects are returned as a list of rectangles. @@ -555,13 +563,14 @@ public: @param winStride Window stride. It must be a multiple of block stride. @param padding Padding @param scale Coefficient of the detection window increase. - @param finalThreshold Final threshold + @param groupThreshold Coefficient to regulate the similarity threshold. When detected, some objects can be covered + by many rectangles. 0 means not to perform grouping. @param useMeanshiftGrouping indicates grouping algorithm */ virtual void detectMultiScale(InputArray img, CV_OUT std::vector& foundLocations, double hitThreshold = 0, Size winStride = Size(), Size padding = Size(), double scale = 1.05, - double finalThreshold = 2.0, bool useMeanshiftGrouping = false) const; + double groupThreshold = 2.0, bool useMeanshiftGrouping = false) const; /** @brief Computes gradients and quantized gradient orientations. @param img Matrix contains the image to be computed @@ -666,6 +675,10 @@ public: */ void groupRectangles(std::vector& rectList, std::vector& weights, int groupThreshold, double eps) const; }; +//! @} + +//! @addtogroup objdetect_qrcode +//! @{ class CV_EXPORTS_W QRCodeEncoder { protected: @@ -827,7 +840,7 @@ protected: Ptr p; }; -//! @} objdetect +//! @} } #include "opencv2/objdetect/detection_based_tracker.hpp" diff --git a/modules/objdetect/include/opencv2/objdetect/detection_based_tracker.hpp b/modules/objdetect/include/opencv2/objdetect/detection_based_tracker.hpp index 18cde13eab..fb96c668a5 100644 --- a/modules/objdetect/include/opencv2/objdetect/detection_based_tracker.hpp +++ b/modules/objdetect/include/opencv2/objdetect/detection_based_tracker.hpp @@ -51,7 +51,7 @@ namespace cv { -//! @addtogroup objdetect +//! @addtogroup objdetect_cascade_classifier //! @{ class CV_EXPORTS DetectionBasedTracker @@ -215,7 +215,7 @@ class CV_EXPORTS DetectionBasedTracker void detectInRegion(const cv::Mat& img, const cv::Rect& r, std::vector& detectedObjectsInRegions); }; -//! @} objdetect +//! @} } //end of cv namespace diff --git a/modules/objdetect/include/opencv2/objdetect/face.hpp b/modules/objdetect/include/opencv2/objdetect/face.hpp index f2429c5f31..1b3681c652 100644 --- a/modules/objdetect/include/opencv2/objdetect/face.hpp +++ b/modules/objdetect/include/opencv2/objdetect/face.hpp @@ -7,13 +7,15 @@ #include -/** @defgroup dnn_face DNN-based face detection and recognition - */ - namespace cv { -/** @brief DNN-based face detector, model download link: https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx. +//! @addtogroup objdetect_dnn_face +//! @{ + +/** @brief DNN-based face detector + +model download link: https://github.com/opencv/opencv_zoo/tree/master/models/face_detection_yunet */ class CV_EXPORTS_W FaceDetectorYN { @@ -80,7 +82,9 @@ public: int target_id = 0); }; -/** @brief DNN-based face recognizer, model download link: https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view. +/** @brief DNN-based face recognizer + +model download link: https://github.com/opencv/opencv_zoo/tree/master/models/face_recognition_sface */ class CV_EXPORTS_W FaceRecognizerSF { @@ -105,11 +109,11 @@ public: CV_WRAP virtual void feature(InputArray aligned_img, OutputArray face_feature) = 0; /** @brief Calculating the distance between two face features - * @param _face_feature1 the first input feature - * @param _face_feature2 the second input feature of the same size and the same type as _face_feature1 + * @param face_feature1 the first input feature + * @param face_feature2 the second input feature of the same size and the same type as face_feature1 * @param dis_type defining the similarity with optional values "FR_OSINE" or "FR_NORM_L2" */ - CV_WRAP virtual double match(InputArray _face_feature1, InputArray _face_feature2, int dis_type = FaceRecognizerSF::FR_COSINE) const = 0; + CV_WRAP virtual double match(InputArray face_feature1, InputArray face_feature2, int dis_type = FaceRecognizerSF::FR_COSINE) const = 0; /** @brief Creates an instance of this class with given parameters * @param model the path of the onnx model used for face recognition @@ -120,6 +124,7 @@ public: CV_WRAP static Ptr create(const String& model, const String& config, int backend_id = 0, int target_id = 0); }; +//! @} } // namespace cv #endif diff --git a/modules/objdetect/src/face_detect.cpp b/modules/objdetect/src/face_detect.cpp index a9ca2d8957..10259a32e6 100644 --- a/modules/objdetect/src/face_detect.cpp +++ b/modules/objdetect/src/face_detect.cpp @@ -6,13 +6,16 @@ #include "opencv2/imgproc.hpp" #include "opencv2/core.hpp" +#ifdef HAVE_OPENCV_DNN #include "opencv2/dnn.hpp" +#endif #include namespace cv { +#ifdef HAVE_OPENCV_DNN class FaceDetectorYNImpl : public FaceDetectorYN { public: @@ -273,6 +276,7 @@ private: std::vector priors; }; +#endif Ptr FaceDetectorYN::create(const String& model, const String& config, @@ -283,7 +287,12 @@ Ptr FaceDetectorYN::create(const String& model, const int backend_id, const int target_id) { +#ifdef HAVE_OPENCV_DNN return makePtr(model, config, input_size, score_threshold, nms_threshold, top_k, backend_id, target_id); +#else + CV_UNUSED(model); CV_UNUSED(config); CV_UNUSED(input_size); CV_UNUSED(score_threshold); CV_UNUSED(nms_threshold); CV_UNUSED(top_k); CV_UNUSED(backend_id); CV_UNUSED(target_id); + CV_Error(cv::Error::StsNotImplemented, "cv::FaceDetectorYN requires enabled 'dnn' module."); +#endif } } // namespace cv diff --git a/modules/objdetect/src/face_recognize.cpp b/modules/objdetect/src/face_recognize.cpp index 66271068b2..497303e42b 100644 --- a/modules/objdetect/src/face_recognize.cpp +++ b/modules/objdetect/src/face_recognize.cpp @@ -4,13 +4,17 @@ #include "precomp.hpp" +#include "opencv2/core.hpp" +#ifdef HAVE_OPENCV_DNN #include "opencv2/dnn.hpp" +#endif #include namespace cv { +#ifdef HAVE_OPENCV_DNN class FaceRecognizerSFImpl : public FaceRecognizerSF { public: @@ -173,10 +177,16 @@ private: private: dnn::Net net; }; +#endif Ptr FaceRecognizerSF::create(const String& model, const String& config, int backend_id, int target_id) { +#ifdef HAVE_OPENCV_DNN return makePtr(model, config, backend_id, target_id); +#else + CV_UNUSED(model); CV_UNUSED(config); CV_UNUSED(backend_id); CV_UNUSED(target_id); + CV_Error(cv::Error::StsNotImplemented, "cv::FaceRecognizerSF requires enabled 'dnn' module"); +#endif } } // namespace cv diff --git a/modules/objdetect/src/hog.cpp b/modules/objdetect/src/hog.cpp index 281b009558..b57e92ff9a 100644 --- a/modules/objdetect/src/hog.cpp +++ b/modules/objdetect/src/hog.cpp @@ -42,7 +42,6 @@ #include "precomp.hpp" #include "cascadedetect.hpp" -#include "opencv2/core/core_c.h" #include "opencv2/core/hal/intrin.hpp" #include "opencl_kernels_objdetect.hpp" @@ -1887,7 +1886,7 @@ static bool ocl_detectMultiScale(InputArray _img, std::vector &found_locat void HOGDescriptor::detectMultiScale( InputArray _img, std::vector& foundLocations, std::vector& foundWeights, double hitThreshold, Size winStride, Size padding, - double scale0, double finalThreshold, bool useMeanshiftGrouping) const + double scale0, double groupThreshold, bool useMeanshiftGrouping) const { CV_INSTRUMENT_REGION(); @@ -1913,7 +1912,7 @@ void HOGDescriptor::detectMultiScale( CV_OCL_RUN(_img.dims() <= 2 && _img.type() == CV_8UC1 && scale0 > 1 && winStride.width % blockStride.width == 0 && winStride.height % blockStride.height == 0 && padding == Size(0,0) && _img.isUMat(), - ocl_detectMultiScale(_img, foundLocations, levelScale, hitThreshold, winStride, finalThreshold, oclSvmDetector, + ocl_detectMultiScale(_img, foundLocations, levelScale, hitThreshold, winStride, groupThreshold, oclSvmDetector, blockSize, cellSize, nbins, blockStride, winSize, gammaCorrection, L2HysThreshold, (float)getWinSigma(), free_coef, signedGradient)); std::vector allCandidates; @@ -1934,21 +1933,21 @@ void HOGDescriptor::detectMultiScale( std::copy(tempWeights.begin(), tempWeights.end(), back_inserter(foundWeights)); if ( useMeanshiftGrouping ) - groupRectangles_meanshift(foundLocations, foundWeights, foundScales, finalThreshold, winSize); + groupRectangles_meanshift(foundLocations, foundWeights, foundScales, groupThreshold, winSize); else - groupRectangles(foundLocations, foundWeights, (int)finalThreshold, 0.2); + groupRectangles(foundLocations, foundWeights, (int)groupThreshold, 0.2); clipObjects(imgSize, foundLocations, 0, &foundWeights); } void HOGDescriptor::detectMultiScale(InputArray img, std::vector& foundLocations, double hitThreshold, Size winStride, Size padding, - double scale0, double finalThreshold, bool useMeanshiftGrouping) const + double scale0, double groupThreshold, bool useMeanshiftGrouping) const { CV_INSTRUMENT_REGION(); std::vector foundWeights; detectMultiScale(img, foundLocations, foundWeights, hitThreshold, winStride, - padding, scale0, finalThreshold, useMeanshiftGrouping); + padding, scale0, groupThreshold, useMeanshiftGrouping); } std::vector HOGDescriptor::getDefaultPeopleDetector() diff --git a/modules/objdetect/test/test_face.cpp b/modules/objdetect/test/test_face.cpp index 2e944c50df..d33032fa2f 100644 --- a/modules/objdetect/test/test_face.cpp +++ b/modules/objdetect/test/test_face.cpp @@ -78,7 +78,7 @@ TEST(Objdetect_face_detection, regression) // } // Initialize detector - std::string model = findDataFile("dnn/onnx/models/yunet-202109.onnx", false); + std::string model = findDataFile("dnn/onnx/models/yunet-202202.onnx", false); Ptr faceDetector = FaceDetectorYN::create(model, "", Size(300, 300)); faceDetector->setScoreThreshold(0.7f); @@ -178,7 +178,7 @@ TEST(Objdetect_face_recognition, regression) } // Initialize detector - std::string detect_model = findDataFile("dnn/onnx/models/yunet-202109.onnx", false); + std::string detect_model = findDataFile("dnn/onnx/models/yunet-202202.onnx", false); Ptr faceDetector = FaceDetectorYN::create(detect_model, "", Size(150, 150), score_thresh, nms_thresh); std::string recog_model = findDataFile("dnn/onnx/models/face_recognizer_fast.onnx", false); diff --git a/modules/python/src2/cv2.cpp b/modules/python/src2/cv2.cpp index b39db34fcb..294905c783 100644 --- a/modules/python/src2/cv2.cpp +++ b/modules/python/src2/cv2.cpp @@ -130,45 +130,155 @@ struct ConstDef long long val; }; -static void init_submodule(PyObject * root, const char * name, PyMethodDef * methods, ConstDef * consts) -{ - // traverse and create nested submodules - std::string s = name; - size_t i = s.find('.'); - while (i < s.length() && i != std::string::npos) - { - size_t j = s.find('.', i); - if (j == std::string::npos) - j = s.length(); - std::string short_name = s.substr(i, j-i); - std::string full_name = s.substr(0, j); - i = j+1; +static inline bool strStartsWith(const std::string& str, const std::string& prefix) { + return prefix.empty() || \ + (str.size() >= prefix.size() && std::memcmp(str.data(), prefix.data(), prefix.size()) == 0); +} - PyObject * d = PyModule_GetDict(root); - PyObject * submod = PyDict_GetItemString(d, short_name.c_str()); - if (submod == NULL) +static inline bool strEndsWith(const std::string& str, char symbol) { + return !str.empty() && str[str.size() - 1] == symbol; +} + +/** + * \brief Creates a submodule of the `root`. Missing parents submodules + * are created as needed. If name equals to parent module name than + * borrowed reference to parent module is returned (no reference counting + * are done). + * Submodule lifetime is managed by the parent module. + * If nested submodules are created than the lifetime is managed by the + * predecessor submodule in a list. + * + * \param parent_module Parent module object. + * \param name Submodule name. + * \return borrowed reference to the created submodule. + * If any of submodules can't be created than NULL is returned. + */ +static PyObject* createSubmodule(PyObject* parent_module, const std::string& name) +{ + if (!parent_module) { - submod = PyImport_AddModule(full_name.c_str()); - PyDict_SetItemString(d, short_name.c_str(), submod); + return PyErr_Format(PyExc_ImportError, + "Bindings generation error. " + "Parent module is NULL during the submodule '%s' creation", + name.c_str() + ); + } + if (strEndsWith(name, '.')) + { + return PyErr_Format(PyExc_ImportError, + "Bindings generation error. " + "Submodule can't end with a dot. Got: %s", name.c_str() + ); } - if (short_name != "") - root = submod; - } + const std::string parent_name = PyModule_GetName(parent_module); - // populate module's dict - PyObject * d = PyModule_GetDict(root); - for (PyMethodDef * m = methods; m->ml_name != NULL; ++m) - { - PyObject * method_obj = PyCFunction_NewEx(m, NULL, NULL); - PyDict_SetItemString(d, m->ml_name, method_obj); - Py_DECREF(method_obj); - } - for (ConstDef * c = consts; c->name != NULL; ++c) - { - PyDict_SetItemString(d, c->name, PyLong_FromLongLong(c->val)); - } + /// Special case handling when caller tries to register a submodule of the parent module with + /// the same name + if (name == parent_name) { + return parent_module; + } + if (!strStartsWith(name, parent_name)) + { + return PyErr_Format(PyExc_ImportError, + "Bindings generation error. " + "Submodule name should always start with a parent module name. " + "Parent name: %s. Submodule name: %s", parent_name.c_str(), + name.c_str() + ); + } + + size_t submodule_name_end = name.find('.', parent_name.size() + 1); + /// There is no intermediate submodules in the provided name + if (submodule_name_end == std::string::npos) + { + submodule_name_end = name.size(); + } + + PyObject* submodule = parent_module; + + for (size_t submodule_name_start = parent_name.size() + 1; + submodule_name_start < name.size(); ) + { + const std::string submodule_name = name.substr(submodule_name_start, + submodule_name_end - submodule_name_start); + + const std::string full_submodule_name = name.substr(0, submodule_name_end); + + + PyObject* parent_module_dict = PyModule_GetDict(submodule); + /// If submodule already exists it can be found in the parent module dictionary, + /// otherwise it should be added to it. + submodule = PyDict_GetItemString(parent_module_dict, + submodule_name.c_str()); + if (!submodule) + { + /// Populates global modules dictionary and returns borrowed reference to it + submodule = PyImport_AddModule(full_submodule_name.c_str()); + if (!submodule) + { + /// Return `PyImport_AddModule` NULL with an exception set on failure. + return NULL; + } + /// Populates parent module dictionary. Submodule lifetime should be managed + /// by the global modules dictionary and parent module dictionary, so Py_DECREF after + /// successfull call to the `PyDict_SetItemString` is redundant. + if (PyDict_SetItemString(parent_module_dict, submodule_name.c_str(), submodule) < 0) { + return PyErr_Format(PyExc_ImportError, + "Can't register a submodule '%s' (full name: '%s')", + submodule_name.c_str(), full_submodule_name.c_str() + ); + } + } + + submodule_name_start = submodule_name_end + 1; + + submodule_name_end = name.find('.', submodule_name_start); + if (submodule_name_end == std::string::npos) { + submodule_name_end = name.size(); + } + } + return submodule; +} + +static bool init_submodule(PyObject * root, const char * name, PyMethodDef * methods, ConstDef * consts) +{ + // traverse and create nested submodules + PyObject* submodule = createSubmodule(root, name); + if (!submodule) + { + return false; + } + // populate module's dict + PyObject * d = PyModule_GetDict(submodule); + for (PyMethodDef * m = methods; m->ml_name != NULL; ++m) + { + PyObject * method_obj = PyCFunction_NewEx(m, NULL, NULL); + if (PyDict_SetItemString(d, m->ml_name, method_obj) < 0) + { + PyErr_Format(PyExc_ImportError, + "Can't register function %s in module: %s", m->ml_name, name + ); + Py_CLEAR(method_obj); + return false; + } + Py_DECREF(method_obj); + } + for (ConstDef * c = consts; c->name != NULL; ++c) + { + PyObject* const_obj = PyLong_FromLongLong(c->val); + if (PyDict_SetItemString(d, c->name, const_obj) < 0) + { + PyErr_Format(PyExc_ImportError, + "Can't register constant %s in module %s", c->name, name + ); + Py_CLEAR(const_obj); + return false; + } + Py_DECREF(const_obj); + } + return true; } #include "pyopencv_generated_modules_content.h" @@ -176,7 +286,10 @@ static void init_submodule(PyObject * root, const char * name, PyMethodDef * met static bool init_body(PyObject * m) { #define CVPY_MODULE(NAMESTR, NAME) \ - init_submodule(m, MODULESTR NAMESTR, methods_##NAME, consts_##NAME) + if (!init_submodule(m, MODULESTR NAMESTR, methods_##NAME, consts_##NAME)) \ + { \ + return false; \ + } #include "pyopencv_generated_modules.h" #undef CVPY_MODULE @@ -193,7 +306,13 @@ static bool init_body(PyObject * m) PyObject* d = PyModule_GetDict(m); - PyDict_SetItemString(d, "__version__", PyString_FromString(CV_VERSION)); + PyObject* version_obj = PyString_FromString(CV_VERSION); + if (PyDict_SetItemString(d, "__version__", version_obj) < 0) { + PyErr_SetString(PyExc_ImportError, "Can't update module version"); + Py_CLEAR(version_obj); + return false; + } + Py_DECREF(version_obj); PyObject *opencv_error_dict = PyDict_New(); PyDict_SetItemString(opencv_error_dict, "file", Py_None); @@ -207,7 +326,18 @@ static bool init_body(PyObject * m) PyDict_SetItemString(d, "error", opencv_error); -#define PUBLISH(I) PyDict_SetItemString(d, #I, PyInt_FromLong(I)) +#define PUBLISH_(I, var_name, type_obj) \ + PyObject* type_obj = PyInt_FromLong(I); \ + if (PyDict_SetItemString(d, var_name, type_obj) < 0) \ + { \ + PyErr_SetString(PyExc_ImportError, "Can't register " var_name " constant"); \ + Py_CLEAR(type_obj); \ + return false; \ + } \ + Py_DECREF(type_obj); + +#define PUBLISH(I) PUBLISH_(I, #I, I ## _obj) + PUBLISH(CV_8U); PUBLISH(CV_8UC1); PUBLISH(CV_8UC2); @@ -243,6 +373,7 @@ static bool init_body(PyObject * m) PUBLISH(CV_64FC2); PUBLISH(CV_64FC3); PUBLISH(CV_64FC4); +#undef PUBLISH_ #undef PUBLISH return true; diff --git a/modules/python/src2/pycompat.hpp b/modules/python/src2/pycompat.hpp index 03379ec956..8b44726d5f 100644 --- a/modules/python/src2/pycompat.hpp +++ b/modules/python/src2/pycompat.hpp @@ -231,7 +231,12 @@ PyObject* pyopencv_from(const TYPE& src) ERROR_HANDLER; \ } \ CVPY_TYPE_INCREF(pyopencv_##NAME##_TypePtr); \ - PyModule_AddObject(m, #WNAME, (PyObject *)pyopencv_##NAME##_TypePtr); \ + if (PyModule_AddObject(m, #WNAME, (PyObject *)pyopencv_##NAME##_TypePtr) < 0) \ + { \ + printf("Failed to register a new type: " #WNAME ", base (" #BASE ")\n"); \ + Py_DECREF(pyopencv_##NAME##_TypePtr); \ + ERROR_HANDLER; \ + } \ } //================================================================================================== @@ -304,10 +309,15 @@ PyObject* pyopencv_from(const TYPE& src) pyopencv_##NAME##_TypePtr = PyType_FromSpecWithBases(&pyopencv_##NAME##_Spec, bases); \ if (!pyopencv_##NAME##_TypePtr) \ { \ - printf("Failed to init: " #WNAME ", base (" #BASE ")" "\n"); \ + printf("Failed to create type from spec: " #WNAME ", base (" #BASE ")\n"); \ + ERROR_HANDLER; \ + } \ + if (PyModule_AddObject(m, #WNAME, (PyObject *)pyopencv_##NAME##_TypePtr) < 0) \ + { \ + printf("Failed to register a new type: " #WNAME ", base (" #BASE ")\n"); \ + Py_DECREF(pyopencv_##NAME##_TypePtr); \ ERROR_HANDLER; \ } \ - PyModule_AddObject(m, #NAME, (PyObject *)pyopencv_##NAME##_TypePtr); \ } // Debug module load: diff --git a/modules/python/test/test_misc.py b/modules/python/test/test_misc.py index 051ac33ac9..48657d595c 100644 --- a/modules/python/test/test_misc.py +++ b/modules/python/test/test_misc.py @@ -1,6 +1,7 @@ #!/usr/bin/env python from __future__ import print_function +import sys import ctypes from functools import partial from collections import namedtuple @@ -607,6 +608,32 @@ class Arguments(NewOpenCVTests): self.assertTrue(isinstance(rr, tuple), msg=type(rrv)) self.assertEqual(len(rr), 3) + def test_nested_function_availability(self): + self.assertTrue(hasattr(cv.utils, "nested"), + msg="Module is not generated for nested namespace") + self.assertTrue(hasattr(cv.utils.nested, "testEchoBooleanFunction"), + msg="Function in nested module is not available") + + if sys.version_info[0] < 3: + # Nested submodule is managed only by the global submodules dictionary + # and parent native module + expected_ref_count = 2 + else: + # Nested submodule is managed by the global submodules dictionary, + # parent native module and Python part of the submodule + expected_ref_count = 3 + + # `getrefcount` temporary increases reference counter by 1 + actual_ref_count = sys.getrefcount(cv.utils.nested) - 1 + + self.assertEqual(actual_ref_count, expected_ref_count, + msg="Nested submodule reference counter has wrong value\n" + "Expected: {}. Actual: {}".format(expected_ref_count, actual_ref_count)) + for flag in (True, False): + self.assertEqual(flag, cv.utils.nested.testEchoBooleanFunction(flag), + msg="Function in nested module returns wrong result") + + class CanUsePurePythonModuleFunction(NewOpenCVTests): def test_can_get_ocv_version(self): import sys diff --git a/modules/stitching/src/seam_finders.cpp b/modules/stitching/src/seam_finders.cpp index c5e4cb04ff..0e0c7d1967 100644 --- a/modules/stitching/src/seam_finders.cpp +++ b/modules/stitching/src/seam_finders.cpp @@ -587,8 +587,8 @@ void DpSeamFinder::computeGradients(const Mat &image1, const Mat &image2) bool DpSeamFinder::hasOnlyOneNeighbor(int comp) { std::set >::iterator begin, end; - begin = lower_bound(edges_.begin(), edges_.end(), std::make_pair(comp, std::numeric_limits::min())); - end = upper_bound(edges_.begin(), edges_.end(), std::make_pair(comp, std::numeric_limits::max())); + begin = edges_.lower_bound(std::make_pair(comp, std::numeric_limits::min())); + end = edges_.upper_bound(std::make_pair(comp, std::numeric_limits::max())); return ++begin == end; } diff --git a/modules/ts/include/opencv2/ts/cuda_test.hpp b/modules/ts/include/opencv2/ts/cuda_test.hpp index 53bdbc8a4f..f1851c5f8f 100644 --- a/modules/ts/include/opencv2/ts/cuda_test.hpp +++ b/modules/ts/include/opencv2/ts/cuda_test.hpp @@ -63,6 +63,7 @@ namespace cvtest // GpuMat create cv::cuda::GpuMat createMat(cv::Size size, int type, bool useRoi = false); + cv::cuda::GpuMat createMat(cv::Size size, int type, cv::Size& size0, cv::Point& ofs, bool useRoi = false); cv::cuda::GpuMat loadMat(const cv::Mat& m, bool useRoi = false); ////////////////////////////////////////////////////////////////////// diff --git a/modules/ts/src/cuda_test.cpp b/modules/ts/src/cuda_test.cpp index 3870415f05..a50f2cc3ce 100644 --- a/modules/ts/src/cuda_test.cpp +++ b/modules/ts/src/cuda_test.cpp @@ -91,7 +91,13 @@ namespace cvtest GpuMat createMat(Size size, int type, bool useRoi) { - Size size0 = size; + Size size0; Point ofs; + return createMat(size, type, size0, ofs, useRoi); + } + + GpuMat createMat(Size size, int type, Size& size0, Point& ofs, bool useRoi) + { + size0 = size; if (useRoi) { @@ -100,9 +106,10 @@ namespace cvtest } GpuMat d_m(size0, type); - - if (size0 != size) - d_m = d_m(Rect((size0.width - size.width) / 2, (size0.height - size.height) / 2, size.width, size.height)); + if (size0 != size) { + ofs = Point((size0.width - size.width) / 2, (size0.height - size.height) / 2); + d_m = d_m(Rect(ofs, size)); + } return d_m; } diff --git a/modules/videoio/cmake/detect_aravis.cmake b/modules/videoio/cmake/detect_aravis.cmake index e7b3828993..cf8429e5dc 100644 --- a/modules/videoio/cmake/detect_aravis.cmake +++ b/modules/videoio/cmake/detect_aravis.cmake @@ -1,6 +1,6 @@ # --- Aravis SDK --- if(NOT HAVE_ARAVIS_API AND PKG_CONFIG_FOUND) - ocv_check_modules(ARAVIS aravis-0.6 QUIET) + ocv_check_modules(ARAVIS aravis-0.8 QUIET) if(ARAVIS_FOUND) set(HAVE_ARAVIS_API TRUE) endif() @@ -9,9 +9,9 @@ endif() if(NOT HAVE_ARAVIS_API) find_path(ARAVIS_INCLUDE "arv.h" PATHS "${ARAVIS_ROOT}" ENV ARAVIS_ROOT - PATH_SUFFIXES "include/aravis-0.6" + PATH_SUFFIXES "include/aravis-0.8" NO_DEFAULT_PATH) - find_library(ARAVIS_LIBRARY "aravis-0.6" + find_library(ARAVIS_LIBRARY "aravis-0.8" PATHS "${ARAVIS_ROOT}" ENV ARAVIS_ROOT PATH_SUFFIXES "lib" NO_DEFAULT_PATH) diff --git a/modules/videoio/cmake/detect_gstreamer.cmake b/modules/videoio/cmake/detect_gstreamer.cmake index fc6c347383..b2ab06060d 100644 --- a/modules/videoio/cmake/detect_gstreamer.cmake +++ b/modules/videoio/cmake/detect_gstreamer.cmake @@ -44,6 +44,10 @@ if(NOT HAVE_GSTREAMER AND WIN32) NAMES gstvideo gstvideo-1.0 PATHS ${env_paths} PATH_SUFFIXES "lib") + find_library(GSTREAMER_audio_LIBRARY + NAMES gstaudio gstaudio-1.0 + PATHS ${env_paths} + PATH_SUFFIXES "lib") find_library(GSTREAMER_glib_LIBRARY NAMES glib-2.0 @@ -63,6 +67,7 @@ if(NOT HAVE_GSTREAMER AND WIN32) AND GSTREAMER_pbutils_LIBRARY AND GSTREAMER_riff_LIBRARY AND GSTREAMER_video_LIBRARY + AND GSTREAMER_audio_LIBRARY AND GSTREAMER_glib_LIBRARY AND GSTREAMER_gobject_LIBRARY) file(STRINGS "${GSTREAMER_gst_INCLUDE_DIR}/gst/gstversion.h" ver_strings REGEX "#define +GST_VERSION_(MAJOR|MINOR|MICRO|NANO).*") @@ -77,6 +82,7 @@ if(NOT HAVE_GSTREAMER AND WIN32) ${GSTREAMER_app_LIBRARY} ${GSTREAMER_riff_LIBRARY} ${GSTREAMER_video_LIBRARY} + ${GSTREAMER_audio_LIBRARY} ${GSTREAMER_pbutils_LIBRARY} ${GSTREAMER_glib_LIBRARY} ${GSTREAMER_gobject_LIBRARY}) diff --git a/modules/videoio/src/cap_aravis.cpp b/modules/videoio/src/cap_aravis.cpp index 1f0e21eb33..49f7789f80 100644 --- a/modules/videoio/src/cap_aravis.cpp +++ b/modules/videoio/src/cap_aravis.cpp @@ -51,8 +51,8 @@ #include // -// This file provides wrapper for using Aravis SDK library to access GigE Vision cameras. -// Aravis library (version 0.4 or 0.6) shall be installed else this code will not be included in build. +// This file provides wrapper for using Aravis SDK library to access GigE and USB 3 Vision cameras. +// Aravis library (version 0.8) shall be installed else this code will not be included in build. // // To include this module invoke cmake with -DWITH_ARAVIS=ON // @@ -151,10 +151,6 @@ protected: bool softwareTriggered; // Flag if the camera is software triggered bool allowAutoTrigger; // Flag that user allowed to trigger software triggered cameras automatically - gint64 *pixelFormats; - guint pixelFormatsCnt; - - int num_buffers; // number of payload transmission buffers ArvPixelFormat pixelFormat; // pixel format @@ -225,7 +221,7 @@ bool CvCaptureCAM_Aravis::create( int index ) if(!getDeviceNameById(index, deviceName)) return false; - return NULL != (camera = arv_camera_new(deviceName.c_str())); + return NULL != (camera = arv_camera_new(deviceName.c_str(), NULL)); } bool CvCaptureCAM_Aravis::init_buffers() @@ -234,7 +230,7 @@ bool CvCaptureCAM_Aravis::init_buffers() g_object_unref(stream); stream = NULL; } - if( (stream = arv_camera_create_stream(camera, NULL, NULL)) ) { + if( (stream = arv_camera_create_stream(camera, NULL, NULL, NULL)) ) { if( arv_camera_is_gv_device(camera) ) { g_object_set(stream, "socket-buffer", ARV_GV_STREAM_SOCKET_BUFFER_AUTO, @@ -245,7 +241,7 @@ bool CvCaptureCAM_Aravis::init_buffers() "packet-timeout", (unsigned) 40000, "frame-retention", (unsigned) 200000, NULL); } - payload = arv_camera_get_payload (camera); + payload = arv_camera_get_payload (camera, NULL); for (int i = 0; i < num_buffers; i++) arv_stream_push_buffer(stream, arv_buffer_new(payload, NULL)); @@ -260,25 +256,23 @@ bool CvCaptureCAM_Aravis::open( int index ) { if(create(index)) { // fetch properties bounds - pixelFormats = arv_camera_get_available_pixel_formats(camera, &pixelFormatsCnt); + arv_camera_get_width_bounds(camera, &widthMin, &widthMax, NULL); + arv_camera_get_height_bounds(camera, &heightMin, &heightMax, NULL); + arv_camera_set_region(camera, 0, 0, widthMax, heightMax, NULL); - arv_camera_get_width_bounds(camera, &widthMin, &widthMax); - arv_camera_get_height_bounds(camera, &heightMin, &heightMax); - arv_camera_set_region(camera, 0, 0, widthMax, heightMax); - - if( (fpsAvailable = arv_camera_is_frame_rate_available(camera)) ) - arv_camera_get_frame_rate_bounds(camera, &fpsMin, &fpsMax); - if( (gainAvailable = arv_camera_is_gain_available(camera)) ) - arv_camera_get_gain_bounds (camera, &gainMin, &gainMax); - if( (exposureAvailable = arv_camera_is_exposure_time_available(camera)) ) - arv_camera_get_exposure_time_bounds (camera, &exposureMin, &exposureMax); + if( (fpsAvailable = arv_camera_is_frame_rate_available(camera, NULL)) ) + arv_camera_get_frame_rate_bounds(camera, &fpsMin, &fpsMax, NULL); + if( (gainAvailable = arv_camera_is_gain_available(camera, NULL)) ) + arv_camera_get_gain_bounds (camera, &gainMin, &gainMax, NULL); + if( (exposureAvailable = arv_camera_is_exposure_time_available(camera, NULL)) ) + arv_camera_get_exposure_time_bounds (camera, &exposureMin, &exposureMax, NULL); // get initial values - pixelFormat = arv_camera_get_pixel_format(camera); - exposure = exposureAvailable ? arv_camera_get_exposure_time(camera) : 0; - gain = gainAvailable ? arv_camera_get_gain(camera) : 0; - fps = arv_camera_get_frame_rate(camera); - softwareTriggered = (strcmp(arv_camera_get_trigger_source(camera), "Software") == 0); + pixelFormat = arv_camera_get_pixel_format(camera, NULL); + exposure = exposureAvailable ? arv_camera_get_exposure_time(camera, NULL) : 0; + gain = gainAvailable ? arv_camera_get_gain(camera, NULL) : 0; + fps = arv_camera_get_frame_rate(camera, NULL); + softwareTriggered = (strcmp(arv_camera_get_trigger_source(camera, NULL), "Software") == 0); return startCapture(); } @@ -295,7 +289,7 @@ bool CvCaptureCAM_Aravis::grabFrame() int max_tries = 10; int tries = 0; if (softwareTriggered && allowAutoTrigger) { - arv_camera_software_trigger (camera); + arv_camera_software_trigger (camera, NULL); } for(; tries < max_tries; tries ++) { arv_buffer = arv_stream_timeout_pop_buffer (stream, 200000); @@ -402,7 +396,7 @@ void CvCaptureCAM_Aravis::autoExposureControl(IplImage* image) if( ng < gain ) { // priority 1 - reduce gain - arv_camera_set_gain(camera, (gain = ng)); + arv_camera_set_gain(camera, (gain = ng), NULL); return; } } @@ -411,7 +405,7 @@ void CvCaptureCAM_Aravis::autoExposureControl(IplImage* image) // priority 2 - control of exposure time if(std::fabs(exposure - ne) > 2) { // we have not yet reach the max-e level - arv_camera_set_exposure_time(camera, (exposure = ne) ); + arv_camera_set_exposure_time(camera, (exposure = ne), NULL); return; } } @@ -420,12 +414,12 @@ void CvCaptureCAM_Aravis::autoExposureControl(IplImage* image) if(exposureAvailable) { // exposure at maximum - increase gain if possible if(ng > gain && ng < gainMax && ne >= maxe) { - arv_camera_set_gain(camera, (gain = ng)); + arv_camera_set_gain(camera, (gain = ng), NULL); return; } } else { // priority 3 - increase gain - arv_camera_set_gain(camera, (gain = ng)); + arv_camera_set_gain(camera, (gain = ng), NULL); return; } } @@ -435,7 +429,7 @@ void CvCaptureCAM_Aravis::autoExposureControl(IplImage* image) if(gainAvailable && autoGain && exposureAvailable) { if(gain > gainMin && exposure < maxe) { exposure = CLIP( ne * 1.05, exposureMin, maxe); - arv_camera_set_exposure_time(camera, exposure ); + arv_camera_set_exposure_time(camera, exposure, NULL); } } } @@ -461,25 +455,25 @@ double CvCaptureCAM_Aravis::getProperty( int property_id ) const case CV_CAP_PROP_EXPOSURE: if(exposureAvailable) { /* exposure time in seconds, like 1/100 s */ - return arv_camera_get_exposure_time(camera) / 1e6; + return arv_camera_get_exposure_time(camera, NULL) / 1e6; } break; case CV_CAP_PROP_FPS: if(fpsAvailable) { - return arv_camera_get_frame_rate(camera); + return arv_camera_get_frame_rate(camera, NULL); } break; case CV_CAP_PROP_GAIN: if(gainAvailable) { - return arv_camera_get_gain(camera); + return arv_camera_get_gain(camera, NULL); } break; case CV_CAP_PROP_FOURCC: { - ArvPixelFormat currFormat = arv_camera_get_pixel_format(camera); + ArvPixelFormat currFormat = arv_camera_get_pixel_format(camera, NULL); switch( currFormat ) { case ARV_PIXEL_FORMAT_MONO_8: return MODE_Y800; @@ -517,8 +511,8 @@ bool CvCaptureCAM_Aravis::setProperty( int property_id, double value ) case CV_CAP_PROP_AUTO_EXPOSURE: if(exposureAvailable || gainAvailable) { if( (controlExposure = (bool)(int)value) ) { - exposure = exposureAvailable ? arv_camera_get_exposure_time(camera) : 0; - gain = gainAvailable ? arv_camera_get_gain(camera) : 0; + exposure = exposureAvailable ? arv_camera_get_exposure_time(camera, NULL) : 0; + gain = gainAvailable ? arv_camera_get_gain(camera, NULL) : 0; } } break; @@ -531,13 +525,13 @@ bool CvCaptureCAM_Aravis::setProperty( int property_id, double value ) /* exposure time in seconds, like 1/100 s */ value *= 1e6; // -> from s to us - arv_camera_set_exposure_time(camera, exposure = CLIP(value, exposureMin, exposureMax)); + arv_camera_set_exposure_time(camera, exposure = CLIP(value, exposureMin, exposureMax), NULL); break; } else return false; case CV_CAP_PROP_FPS: if(fpsAvailable) { - arv_camera_set_frame_rate(camera, fps = CLIP(value, fpsMin, fpsMax)); + arv_camera_set_frame_rate(camera, fps = CLIP(value, fpsMin, fpsMax), NULL); break; } else return false; @@ -546,7 +540,7 @@ bool CvCaptureCAM_Aravis::setProperty( int property_id, double value ) if ( (autoGain = (-1 == value) ) ) break; - arv_camera_set_gain(camera, gain = CLIP(value, gainMin, gainMax)); + arv_camera_set_gain(camera, gain = CLIP(value, gainMin, gainMax), NULL); break; } else return false; @@ -574,7 +568,7 @@ bool CvCaptureCAM_Aravis::setProperty( int property_id, double value ) } if(newFormat != pixelFormat) { stopCapture(); - arv_camera_set_pixel_format(camera, pixelFormat = newFormat); + arv_camera_set_pixel_format(camera, pixelFormat = newFormat, NULL); startCapture(); } } @@ -606,7 +600,7 @@ bool CvCaptureCAM_Aravis::setProperty( int property_id, double value ) void CvCaptureCAM_Aravis::stopCapture() { - arv_camera_stop_acquisition(camera); + arv_camera_stop_acquisition(camera, NULL); if(stream) { g_object_unref(stream); @@ -617,8 +611,8 @@ void CvCaptureCAM_Aravis::stopCapture() bool CvCaptureCAM_Aravis::startCapture() { if(init_buffers() ) { - arv_camera_set_acquisition_mode(camera, ARV_ACQUISITION_MODE_CONTINUOUS); - arv_camera_start_acquisition(camera); + arv_camera_set_acquisition_mode(camera, ARV_ACQUISITION_MODE_CONTINUOUS, NULL); + arv_camera_start_acquisition(camera, NULL); return true; } diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp index 2188c25444..47dc00ab04 100644 --- a/modules/videoio/src/cap_ffmpeg_impl.hpp +++ b/modules/videoio/src/cap_ffmpeg_impl.hpp @@ -980,7 +980,11 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& char* options = getenv("OPENCV_FFMPEG_CAPTURE_OPTIONS"); if(options == NULL) { +#if LIBAVFORMAT_VERSION_MICRO >= 100 && LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(55, 48, 100) + av_dict_set(&dict, "rtsp_flags", "prefer_tcp", 0); +#else av_dict_set(&dict, "rtsp_transport", "tcp", 0); +#endif } else { diff --git a/modules/videoio/src/cap_msmf.cpp b/modules/videoio/src/cap_msmf.cpp index d78236913b..d3002a5151 100644 --- a/modules/videoio/src/cap_msmf.cpp +++ b/modules/videoio/src/cap_msmf.cpp @@ -536,7 +536,7 @@ private: // Destructor is private. Caller should call Release. virtual ~SourceReaderCB() { - CV_LOG_WARNING(NULL, "terminating async callback"); + CV_LOG_INFO(NULL, "terminating async callback"); } public: diff --git a/platforms/apple/build_xcframework.py b/platforms/apple/build_xcframework.py index afea5e4691..49878435d0 100755 --- a/platforms/apple/build_xcframework.py +++ b/platforms/apple/build_xcframework.py @@ -58,7 +58,7 @@ if __name__ == "__main__": macos_archs = "x86_64,arm64" print('Using MacOS ARCHS={}'.format(macos_archs)) - catalyst_archs = args.macos_archs + catalyst_archs = args.catalyst_archs if not catalyst_archs and not args.build_only_specified_archs: # Supply defaults catalyst_archs = "x86_64,arm64" diff --git a/platforms/winpack_dldt/2021.4.2/20220118-dldt-fix-msvs-compilation-21469.patch b/platforms/winpack_dldt/2021.4.2/20220118-dldt-fix-msvs-compilation-21469.patch new file mode 100644 index 0000000000..411d5cbd5c --- /dev/null +++ b/platforms/winpack_dldt/2021.4.2/20220118-dldt-fix-msvs-compilation-21469.patch @@ -0,0 +1,12 @@ +diff --git a/inference-engine/src/plugin_api/caseless.hpp b/inference-engine/src/plugin_api/caseless.hpp +index d8ce739..0dd8886 100644 +--- a/inference-engine/src/plugin_api/caseless.hpp ++++ b/inference-engine/src/plugin_api/caseless.hpp +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + #include + #include + #include diff --git a/platforms/winpack_dldt/2021.4.2/cmake/InferenceEngineConfig-version.cmake b/platforms/winpack_dldt/2021.4.2/cmake/InferenceEngineConfig-version.cmake new file mode 100644 index 0000000000..bc449d05cd --- /dev/null +++ b/platforms/winpack_dldt/2021.4.2/cmake/InferenceEngineConfig-version.cmake @@ -0,0 +1,29 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +set(PACKAGE_VERSION_MAJOR 2021) +set(PACKAGE_VERSION_MINOR 4) +set(PACKAGE_VERSION_PATCH 2) +set(PACKAGE_VERSION "${PACKAGE_VERSION_MAJOR}.${PACKAGE_VERSION_MINOR}.${PACKAGE_VERSION_PATCH}") + +set(PACKAGE_VERSION_EXACT False) +set(PACKAGE_VERSION_COMPATIBLE False) + +# Compatibility with old versioning for 2.x +if(PACKAGE_FIND_VERSION_MAJOR VERSION_EQUAL 2) + set(PACKAGE_VERSION_COMPATIBLE True) + if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED) + message(WARNING "Inference Engine versioning has changed. Use ${PACKAGE_VERSION} instead of ${PACKAGE_FIND_VERSION}") + endif() +endif() + +if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT True) + set(PACKAGE_VERSION_COMPATIBLE True) +endif() + +if(PACKAGE_FIND_VERSION_MAJOR EQUAL PACKAGE_VERSION_MAJOR AND + PACKAGE_FIND_VERSION VERSION_LESS PACKAGE_VERSION) + set(PACKAGE_VERSION_COMPATIBLE True) +endif() diff --git a/platforms/winpack_dldt/2021.4.2/cmake/InferenceEngineConfig.cmake b/platforms/winpack_dldt/2021.4.2/cmake/InferenceEngineConfig.cmake new file mode 100644 index 0000000000..d9c9a12de6 --- /dev/null +++ b/platforms/winpack_dldt/2021.4.2/cmake/InferenceEngineConfig.cmake @@ -0,0 +1,31 @@ +# Inference Engine CMake config for OpenCV windows package + +get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) +get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH) + +set(InferenceEngine_LIBRARIES IE::inference_engine) +add_library(IE::inference_engine SHARED IMPORTED) + +set_target_properties(IE::inference_engine PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/deployment_tools/inference_engine/include" +) + +# Import target "IE::inference_engine" for configuration "Debug" +set_property(TARGET IE::inference_engine APPEND PROPERTY IMPORTED_CONFIGURATIONS DEBUG) +set_target_properties(IE::inference_engine PROPERTIES + IMPORTED_IMPLIB_DEBUG "${_IMPORT_PREFIX}/deployment_tools/inference_engine/lib/intel64/inference_engined.lib" + IMPORTED_LINK_DEPENDENT_LIBRARIES_DEBUG "" + IMPORTED_LOCATION_DEBUG "${_IMPORT_PREFIX}/bin/inference_engined.dll" + ) + +# Import target "IE::inference_engine" for configuration "Release" +set_property(TARGET IE::inference_engine APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(IE::inference_engine PROPERTIES + IMPORTED_IMPLIB_RELEASE "${_IMPORT_PREFIX}/deployment_tools/inference_engine/lib/intel64/inference_engine.lib" + IMPORTED_LINK_DEPENDENT_LIBRARIES_RELEASE "" + IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/bin/inference_engine.dll" + ) + +set(InferenceEngine_FOUND ON) diff --git a/platforms/winpack_dldt/2021.4.2/patch.config.py b/platforms/winpack_dldt/2021.4.2/patch.config.py index 7f8715aae2..bd31af236f 100644 --- a/platforms/winpack_dldt/2021.4.2/patch.config.py +++ b/platforms/winpack_dldt/2021.4.2/patch.config.py @@ -2,3 +2,4 @@ applyPatch('20210630-dldt-disable-unused-targets.patch') applyPatch('20210630-dldt-pdb.patch') applyPatch('20210630-dldt-disable-multidevice-autoplugin.patch') applyPatch('20210630-dldt-vs-version.patch') +applyPatch('20220118-dldt-fix-msvs-compilation-21469.patch') diff --git a/platforms/winpack_dldt/2021.4.2/sysroot.config.py b/platforms/winpack_dldt/2021.4.2/sysroot.config.py index fa4281107d..f11e99f843 100644 --- a/platforms/winpack_dldt/2021.4.2/sysroot.config.py +++ b/platforms/winpack_dldt/2021.4.2/sysroot.config.py @@ -1,3 +1,5 @@ +copytree(self.cpath / 'cmake', self.sysrootdir / 'deployment_tools' / 'inference_engine' / 'cmake') + sysroot_bin_dir = prepare_dir(self.sysrootdir / 'bin') copytree(self.build_dir / 'install', self.sysrootdir / 'ngraph') #rm_one(self.sysrootdir / 'ngraph' / 'lib' / 'ngraph.dll') diff --git a/platforms/winpack_dldt/build_package.py b/platforms/winpack_dldt/build_package.py index 88154bafb5..277a13c232 100644 --- a/platforms/winpack_dldt/build_package.py +++ b/platforms/winpack_dldt/build_package.py @@ -388,10 +388,9 @@ class Builder: if self.config.dldt_release: cmake_vars['INF_ENGINE_RELEASE'] = str(self.config.dldt_release) - cmake_vars['INF_ENGINE_LIB_DIRS:PATH'] = str(builderDLDT.sysrootdir / 'deployment_tools/inference_engine/lib/intel64') - assert os.path.exists(cmake_vars['INF_ENGINE_LIB_DIRS:PATH']), cmake_vars['INF_ENGINE_LIB_DIRS:PATH'] - cmake_vars['INF_ENGINE_INCLUDE_DIRS:PATH'] = str(builderDLDT.sysrootdir / 'deployment_tools/inference_engine/include') - assert os.path.exists(cmake_vars['INF_ENGINE_INCLUDE_DIRS:PATH']), cmake_vars['INF_ENGINE_INCLUDE_DIRS:PATH'] + InferenceEngine_DIR = str(builderDLDT.sysrootdir / 'deployment_tools' / 'inference_engine' / 'cmake') + assert os.path.exists(InferenceEngine_DIR), InferenceEngine_DIR + cmake_vars['InferenceEngine_DIR:PATH'] = InferenceEngine_DIR ngraph_DIR = str(builderDLDT.sysrootdir / 'ngraph/cmake') if not os.path.exists(ngraph_DIR): diff --git a/samples/dnn/face_detect.cpp b/samples/dnn/face_detect.cpp index 161940cb4a..d1e6314969 100644 --- a/samples/dnn/face_detect.cpp +++ b/samples/dnn/face_detect.cpp @@ -44,8 +44,8 @@ int main(int argc, char** argv) "{image2 i2 | | Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm}" "{video v | 0 | Path to the input video}" "{scale sc | 1.0 | Scale factor used to resize input video frames}" - "{fd_model fd | yunet.onnx | Path to the model. Download yunet.onnx in https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx }" - "{fr_model fr | face_recognizer_fast.onnx | Path to the face recognition model. Download the model at https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view}" + "{fd_model fd | face_detection_yunet_2021dec.onnx| Path to the model. Download yunet.onnx in https://github.com/opencv/opencv_zoo/tree/master/models/face_detection_yunet}" + "{fr_model fr | face_recognition_sface_2021dec.onnx | Path to the face recognition model. Download the model at https://github.com/opencv/opencv_zoo/tree/master/models/face_recognition_sface}" "{score_threshold | 0.9 | Filter out faces of score < score_threshold}" "{nms_threshold | 0.3 | Suppress bounding boxes of iou >= nms_threshold}" "{top_k | 5000 | Keep top_k bounding boxes before NMS}" @@ -65,6 +65,7 @@ int main(int argc, char** argv) int topK = parser.get("top_k"); bool save = parser.get("save"); + float scale = parser.get("scale"); double cosine_similar_thresh = 0.363; double l2norm_similar_thresh = 1.128; @@ -87,6 +88,9 @@ int main(int argc, char** argv) return 2; } + int imageWidth = int(image1.cols * scale); + int imageHeight = int(image1.rows * scale); + resize(image1, image1, Size(imageWidth, imageHeight)); tm.start(); //! [inference] @@ -199,7 +203,6 @@ int main(int argc, char** argv) else { int frameWidth, frameHeight; - float scale = parser.get("scale"); VideoCapture capture; std::string video = parser.get("video"); if (video.size() == 1 && isdigit(video[0])) diff --git a/samples/dnn/face_detect.py b/samples/dnn/face_detect.py index 8900a7f7ad..9cf38b5d5f 100644 --- a/samples/dnn/face_detect.py +++ b/samples/dnn/face_detect.py @@ -16,8 +16,8 @@ parser.add_argument('--image1', '-i1', type=str, help='Path to the input image1. parser.add_argument('--image2', '-i2', type=str, help='Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm.') parser.add_argument('--video', '-v', type=str, help='Path to the input video.') parser.add_argument('--scale', '-sc', type=float, default=1.0, help='Scale factor used to resize input video frames.') -parser.add_argument('--face_detection_model', '-fd', type=str, default='yunet.onnx', help='Path to the face detection model. Download the model at https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx.') -parser.add_argument('--face_recognition_model', '-fr', type=str, default='face_recognizer_fast.onnx', help='Path to the face recognition model. Download the model at https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view.') +parser.add_argument('--face_detection_model', '-fd', type=str, default='face_detection_yunet_2021dec.onnx', help='Path to the face detection model. Download the model at https://github.com/opencv/opencv_zoo/tree/master/models/face_detection_yunet') +parser.add_argument('--face_recognition_model', '-fr', type=str, default='face_recognition_sface_2021dec.onnx', help='Path to the face recognition model. Download the model at https://github.com/opencv/opencv_zoo/tree/master/models/face_recognition_sface') parser.add_argument('--score_threshold', type=float, default=0.9, help='Filtering out faces of score < score_threshold.') parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.') parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.') @@ -56,11 +56,15 @@ if __name__ == '__main__': # If input is an image if args.image1 is not None: img1 = cv.imread(cv.samples.findFile(args.image1)) + img1Width = int(img1.shape[1]*args.scale) + img1Height = int(img1.shape[0]*args.scale) + img1 = cv.resize(img1, (img1Width, img1Height)) tm.start() + ## [inference] # Set input size before inference - detector.setInputSize((img1.shape[1], img1.shape[0])) + detector.setInputSize((img1Width, img1Height)) faces1 = detector.detect(img1) ## [inference] diff --git a/samples/dnn/text_detection.py b/samples/dnn/text_detection.py index 6fb1e90901..db0ea197bd 100644 --- a/samples/dnn/text_detection.py +++ b/samples/dnn/text_detection.py @@ -195,7 +195,7 @@ def main(): indices = cv.dnn.NMSBoxesRotated(boxes, confidences, confThreshold, nmsThreshold) for i in indices: # get 4 corners of the rotated rect - vertices = cv.boxPoints(boxes[i[0]]) + vertices = cv.boxPoints(boxes[i]) # scale the bounding box coordinates based on the respective ratios for j in range(4): vertices[j][0] *= rW diff --git a/samples/python/camera_calibration_show_extrinsics.py b/samples/python/camera_calibration_show_extrinsics.py index d676691f15..0ee2a19b68 100755 --- a/samples/python/camera_calibration_show_extrinsics.py +++ b/samples/python/camera_calibration_show_extrinsics.py @@ -1,5 +1,18 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- + +''' +Plot camera calibration extrinsics. + +usage: + camera_calibration_show_extrinsics.py [--calibration ] [--cam_width] [--cam_height] [--scale_focal] [--patternCentric ] + +default values: + --calibration : left_intrinsics.yml + --cam_width : 0.064/2 + --cam_height : 0.048/2 + --scale_focal : 40 + --patternCentric : True +''' # Python 2/3 compatibility from __future__ import print_function diff --git a/samples/python/common.py b/samples/python/common.py index 85cda62cd4..e7ad478b88 100755 --- a/samples/python/common.py +++ b/samples/python/common.py @@ -222,7 +222,7 @@ def mosaic(w, imgs): pad = np.zeros_like(img0) imgs = it.chain([img0], imgs) rows = grouper(w, imgs, pad) - return np.vstack(map(np.hstack, rows)) + return np.vstack(list(map(np.hstack, rows))) def getsize(img): h, w = img.shape[:2] diff --git a/samples/python/digits.py b/samples/python/digits.py index e5d8ceb59a..25db411f94 100755 --- a/samples/python/digits.py +++ b/samples/python/digits.py @@ -191,3 +191,4 @@ if __name__ == '__main__': model.save('digits_svm.dat') cv.waitKey(0) + cv.destroyAllWindows() diff --git a/samples/python/digits_video.py b/samples/python/digits_video.py index 692da91219..17f44c333d 100755 --- a/samples/python/digits_video.py +++ b/samples/python/digits_video.py @@ -29,7 +29,7 @@ def main(): src = sys.argv[1] except: src = 0 - cap = video.create_capture(src) + cap = video.create_capture(src, fallback='synth:bg={}:noise=0.05'.format(cv.samples.findFile('sudoku.png'))) classifier_fn = 'digits_svm.dat' if not os.path.exists(classifier_fn): diff --git a/samples/python/facedetect.py b/samples/python/facedetect.py index 488c92d5e5..248206a7cd 100755 --- a/samples/python/facedetect.py +++ b/samples/python/facedetect.py @@ -39,13 +39,13 @@ def main(): except: video_src = 0 args = dict(args) - cascade_fn = args.get('--cascade', "data/haarcascades/haarcascade_frontalface_alt.xml") - nested_fn = args.get('--nested-cascade', "data/haarcascades/haarcascade_eye.xml") + cascade_fn = args.get('--cascade', "haarcascades/haarcascade_frontalface_alt.xml") + nested_fn = args.get('--nested-cascade', "haarcascades/haarcascade_eye.xml") cascade = cv.CascadeClassifier(cv.samples.findFile(cascade_fn)) nested = cv.CascadeClassifier(cv.samples.findFile(nested_fn)) - cam = create_capture(video_src, fallback='synth:bg={}:noise=0.05'.format(cv.samples.findFile('samples/data/lena.jpg'))) + cam = create_capture(video_src, fallback='synth:bg={}:noise=0.05'.format(cv.samples.findFile('lena.jpg'))) while True: _ret, img = cam.read() diff --git a/samples/python/qrcode.py b/samples/python/qrcode.py index b3253f96c6..21b1a59073 100644 --- a/samples/python/qrcode.py +++ b/samples/python/qrcode.py @@ -245,4 +245,6 @@ def main(): if __name__ == '__main__': + print(__doc__) main() + cv.destroyAllWindows() diff --git a/samples/python/stitching_detailed.py b/samples/python/stitching_detailed.py index 316af979c2..3e0ec50acd 100644 --- a/samples/python/stitching_detailed.py +++ b/samples/python/stitching_detailed.py @@ -246,9 +246,9 @@ def get_matcher(args): if matcher_type == "affine": matcher = cv.detail_AffineBestOf2NearestMatcher(False, try_cuda, match_conf) elif range_width == -1: - matcher = cv.detail.BestOf2NearestMatcher_create(try_cuda, match_conf) + matcher = cv.detail_BestOf2NearestMatcher(try_cuda, match_conf) else: - matcher = cv.detail.BestOf2NearestRangeMatcher_create(range_width, try_cuda, match_conf) + matcher = cv.detail_BestOf2NearestRangeMatcher(range_width, try_cuda, match_conf) return matcher diff --git a/samples/python/text_skewness_correction.py b/samples/python/text_skewness_correction.py index c8ee33b39d..c3e97a333b 100644 --- a/samples/python/text_skewness_correction.py +++ b/samples/python/text_skewness_correction.py @@ -15,7 +15,7 @@ import argparse def main(): parser = argparse.ArgumentParser() - parser.add_argument("-i", "--image", required=True, help="path to input image file") + parser.add_argument("-i", "--image", default="imageTextR.png", help="path to input image file") args = vars(parser.parse_args()) # load the image from disk @@ -37,9 +37,9 @@ def main(): coords = cv.findNonZero(thresh) angle = cv.minAreaRect(coords)[-1] # the `cv.minAreaRect` function returns values in the - # range [-90, 0) if the angle is less than -45 we need to add 90 to it - if angle < -45: - angle = (90 + angle) + # range [0, 90) if the angle is more than 45 we need to subtract 90 from it + if angle > 45: + angle = (angle - 90) (h, w) = image.shape[:2] center = (w // 2, h // 2) @@ -55,4 +55,6 @@ def main(): if __name__ == "__main__": + print(__doc__) main() + cv.destroyAllWindows() diff --git a/samples/python/tracker.py b/samples/python/tracker.py index 753e166ad8..3b04c57e8a 100644 --- a/samples/python/tracker.py +++ b/samples/python/tracker.py @@ -1,5 +1,4 @@ #!/usr/bin/env python - ''' Tracker demo @@ -36,43 +35,49 @@ class App(object): def __init__(self, args): self.args = args + self.trackerAlgorithm = args.tracker_algo + self.tracker = self.createTracker() - def initializeTracker(self, image, trackerAlgorithm): + def createTracker(self): + if self.trackerAlgorithm == 'mil': + tracker = cv.TrackerMIL_create() + elif self.trackerAlgorithm == 'goturn': + params = cv.TrackerGOTURN_Params() + params.modelTxt = self.args.goturn + params.modelBin = self.args.goturn_model + tracker = cv.TrackerGOTURN_create(params) + elif self.trackerAlgorithm == 'dasiamrpn': + params = cv.TrackerDaSiamRPN_Params() + params.model = self.args.dasiamrpn_net + params.kernel_cls1 = self.args.dasiamrpn_kernel_cls1 + params.kernel_r1 = self.args.dasiamrpn_kernel_r1 + tracker = cv.TrackerDaSiamRPN_create(params) + else: + sys.exit("Tracker {} is not recognized. Please use one of three available: mil, goturn, dasiamrpn.".format(self.trackerAlgorithm)) + return tracker + + def initializeTracker(self, image): while True: - if trackerAlgorithm == 'mil': - tracker = cv.TrackerMIL_create() - elif trackerAlgorithm == 'goturn': - params = cv.TrackerGOTURN_Params() - params.modelTxt = self.args.goturn - params.modelBin = self.args.goturn_model - tracker = cv.TrackerGOTURN_create(params) - elif trackerAlgorithm == 'dasiamrpn': - params = cv.TrackerDaSiamRPN_Params() - params.model = self.args.dasiamrpn_net - params.kernel_cls1 = self.args.dasiamrpn_kernel_cls1 - params.kernel_r1 = self.args.dasiamrpn_kernel_r1 - tracker = cv.TrackerDaSiamRPN_create(params) - else: - sys.exit("Tracker {} is not recognized. Please use one of three available: mil, goturn, dasiamrpn.".format(trackerAlgorithm)) - print('==> Select object ROI for tracker ...') bbox = cv.selectROI('tracking', image) print('ROI: {}'.format(bbox)) + if bbox[2] <= 0 or bbox[3] <= 0: + sys.exit("ROI selection cancelled. Exiting...") try: - tracker.init(image, bbox) + self.tracker.init(image, bbox) except Exception as e: print('Unable to initialize tracker with requested bounding box. Is there any object?') print(e) print('Try again ...') continue - return tracker + return def run(self): videoPath = self.args.input - trackerAlgorithm = self.args.tracker_algo - camera = create_capture(videoPath, presets['cube']) + print('Using video: {}'.format(videoPath)) + camera = create_capture(cv.samples.findFileOrKeep(videoPath), presets['cube']) if not camera.isOpened(): sys.exit("Can't open video stream: {}".format(videoPath)) @@ -82,7 +87,7 @@ class App(object): assert image is not None cv.namedWindow('tracking') - tracker = self.initializeTracker(image, trackerAlgorithm) + self.initializeTracker(image) print("==> Tracking is started. Press 'SPACE' to re-initialize tracker or 'ESC' for exit...") @@ -92,7 +97,7 @@ class App(object): print("Can't read frame") break - ok, newbox = tracker.update(image) + ok, newbox = self.tracker.update(image) #print(ok, newbox) if ok: @@ -101,7 +106,7 @@ class App(object): cv.imshow("tracking", image) k = cv.waitKey(1) if k == 32: # SPACE - tracker = self.initializeTracker(image) + self.initializeTracker(image) if k == 27: # ESC break @@ -112,22 +117,13 @@ if __name__ == '__main__': print(__doc__) parser = argparse.ArgumentParser(description="Run tracker") parser.add_argument("--input", type=str, default="vtest.avi", help="Path to video source") - parser.add_argument("--tracker_algo", type=str, default="mil", help="One of three available tracking algorithms: mil, goturn, dasiamrpn") + parser.add_argument("--tracker_algo", type=str, default="mil", help="One of available tracking algorithms: mil, goturn, dasiamrpn") parser.add_argument("--goturn", type=str, default="goturn.prototxt", help="Path to GOTURN architecture") parser.add_argument("--goturn_model", type=str, default="goturn.caffemodel", help="Path to GOTERN model") parser.add_argument("--dasiamrpn_net", type=str, default="dasiamrpn_model.onnx", help="Path to onnx model of DaSiamRPN net") parser.add_argument("--dasiamrpn_kernel_r1", type=str, default="dasiamrpn_kernel_r1.onnx", help="Path to onnx model of DaSiamRPN kernel_r1") parser.add_argument("--dasiamrpn_kernel_cls1", type=str, default="dasiamrpn_kernel_cls1.onnx", help="Path to onnx model of DaSiamRPN kernel_cls1") - parser.add_argument("--dasiamrpn_backend", type=int, default=0, help="Choose one of computation backends:\ - 0: automatically (by default),\ - 1: Halide language (http://halide-lang.org/),\ - 2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit),\ - 3: OpenCV implementation") - parser.add_argument("--dasiamrpn_target", type=int, default=0, help="Choose one of target computation devices:\ - 0: CPU target (by default),\ - 1: OpenCL,\ - 2: OpenCL fp16 (half-float precision),\ - 3: VPU") + args = parser.parse_args() App(args).run() cv.destroyAllWindows()