From b91e701f353f9891e19a3ecc8620584bc1b41139 Mon Sep 17 00:00:00 2001 From: ZhangYin Date: Tue, 29 Sep 2020 14:15:11 +0800 Subject: [PATCH 001/422] modified rvv option for clang to match LLVM upstream --- platforms/linux/riscv64-clang.toolchain.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/platforms/linux/riscv64-clang.toolchain.cmake b/platforms/linux/riscv64-clang.toolchain.cmake index 58e06ddf7b..c1c74ab9df 100644 --- a/platforms/linux/riscv64-clang.toolchain.cmake +++ b/platforms/linux/riscv64-clang.toolchain.cmake @@ -17,8 +17,8 @@ set(CMAKE_ASM_COMPILER_TARGET ${CLANG_TARGET_TRIPLE}) # Don't run the linker on compiler check set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) -set(CMAKE_C_FLAGS "-march=rv64gcv --gcc-toolchain=${RISCV_GCC_INSTALL_ROOT} -w ${CMAKE_C_FLAGS}") -set(CMAKE_CXX_FLAGS "-march=rv64gcv --gcc-toolchain=${RISCV_GCC_INSTALL_ROOT} -w ${CXX_FLAGS}") +set(CMAKE_C_FLAGS "-march=rv64gcv0p9 -menable-experimental-extensions --gcc-toolchain=${RISCV_GCC_INSTALL_ROOT} -w ${CMAKE_C_FLAGS}") +set(CMAKE_CXX_FLAGS "-march=rv64gcv0p9 -menable-experimental-extensions --gcc-toolchain=${RISCV_GCC_INSTALL_ROOT} -w ${CXX_FLAGS}") set(CMAKE_FIND_ROOT_PATH ${CMAKE_SYSROOT}) set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) From 12b8d542b7465f495681d3dc0c50cd27f7e0ee94 Mon Sep 17 00:00:00 2001 From: Jojo R Date: Tue, 17 Nov 2020 15:29:03 +0800 Subject: [PATCH 002/422] norm.cpp(normL2Sqr_): improve performance of pipeline The most of target machine use one type cpu unit resource to execute some one type of instruction, e.g. all vx_load API use load/store cpu unit, and v_muladd API use mul/mula cpu unit, we interleave vx_load and v_muladd to improve performance on most targets like RISCV or ARM. --- modules/core/src/norm.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/core/src/norm.cpp b/modules/core/src/norm.cpp index 9aaed8e980..b95cd99bd8 100644 --- a/modules/core/src/norm.cpp +++ b/modules/core/src/norm.cpp @@ -152,10 +152,10 @@ float normL2Sqr_(const float* a, const float* b, int n) { v_float32 t0 = vx_load(a + j) - vx_load(b + j); v_float32 t1 = vx_load(a + j + v_float32::nlanes) - vx_load(b + j + v_float32::nlanes); - v_float32 t2 = vx_load(a + j + 2 * v_float32::nlanes) - vx_load(b + j + 2 * v_float32::nlanes); - v_float32 t3 = vx_load(a + j + 3 * v_float32::nlanes) - vx_load(b + j + 3 * v_float32::nlanes); v_d0 = v_muladd(t0, t0, v_d0); + v_float32 t2 = vx_load(a + j + 2 * v_float32::nlanes) - vx_load(b + j + 2 * v_float32::nlanes); v_d1 = v_muladd(t1, t1, v_d1); + v_float32 t3 = vx_load(a + j + 3 * v_float32::nlanes) - vx_load(b + j + 3 * v_float32::nlanes); v_d2 = v_muladd(t2, t2, v_d2); v_d3 = v_muladd(t3, t3, v_d3); } From 36d771affc952abb30bd2eb8c9d610223be5a68f Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 26 Nov 2020 12:24:15 +0000 Subject: [PATCH 003/422] python: restore sys.path in bootstrap() - multiprocessing need to start from bootstrap code - loading may fail due to missing os.add_dll_directory() calls --- modules/python/package/cv2/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modules/python/package/cv2/__init__.py b/modules/python/package/cv2/__init__.py index d367998b0b..940ac65732 100644 --- a/modules/python/package/cv2/__init__.py +++ b/modules/python/package/cv2/__init__.py @@ -18,6 +18,10 @@ except ImportError: def bootstrap(): import sys + + import copy + save_sys_path = copy.copy(sys.path) + if hasattr(sys, 'OpenCV_LOADER'): print(sys.path) raise ImportError('ERROR: recursion is detected during loading of "cv2" binary extensions. Check OpenCV installation.') @@ -85,6 +89,8 @@ def bootstrap(): del sys.modules['cv2'] import cv2 + sys.path = save_sys_path # multiprocessing should start from bootstrap code (https://github.com/opencv/opencv/issues/18502) + try: import sys del sys.OpenCV_LOADER From 77b986c7a16721146edc51153502c38428d98a6f Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 27 Nov 2020 13:01:54 +0000 Subject: [PATCH 004/422] apple/build_xcframework.py: python syntax - make happy old Python linters --- platforms/apple/build_xcframework.py | 29 +++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/platforms/apple/build_xcframework.py b/platforms/apple/build_xcframework.py index 669d798ae4..e8a77d230f 100755 --- a/platforms/apple/build_xcframework.py +++ b/platforms/apple/build_xcframework.py @@ -10,13 +10,14 @@ from cv_build_utils import execute, print_error, print_header, get_xcode_version if __name__ == "__main__": # Check for dependencies - assert sys.version_info >= (3, 6), f"Python 3.6 or later is required! Current version is {sys.version_info}" + assert sys.version_info >= (3, 6), "Python 3.6 or later is required! Current version is {}".format(sys.version_info) # Need CMake 3.18.5/3.19 or later for a Silicon-related fix to building for the iOS Simulator. # See https://gitlab.kitware.com/cmake/cmake/-/issues/21425 for context. - assert get_cmake_version() >= (3, 18, 5), f"CMake 3.18.5 or later is required. Current version is {get_cmake_version()}" + assert get_cmake_version() >= (3, 18, 5), "CMake 3.18.5 or later is required. Current version is {}".format(get_cmake_version()) # Need Xcode 12.2 for Apple Silicon support - assert get_xcode_version() >= (12, 2), f"Xcode 12.2 command line tools or later are required! Current version is {get_xcode_version()}. \ - Run xcode-select to switch if you have multiple Xcode installs." + assert get_xcode_version() >= (12, 2), \ + "Xcode 12.2 command line tools or later are required! Current version is {}. ".format(get_xcode_version()) + \ + "Run xcode-select to switch if you have multiple Xcode installs." # Parse arguments description = """ @@ -36,32 +37,32 @@ if __name__ == "__main__": args, unknown_args = parser.parse_known_args() if unknown_args: - print(f"The following args are not recognized by this script and will be passed through to the ios/osx build_framework.py scripts: {unknown_args}") + print("The following args are not recognized by this script and will be passed through to the ios/osx build_framework.py scripts: {}".format(unknown_args)) # Parse architectures from args iphoneos_archs = args.iphoneos_archs if not iphoneos_archs and not args.build_only_specified_archs: # Supply defaults iphoneos_archs = "armv7,arm64" - print(f'Using iPhoneOS ARCHS={iphoneos_archs}') + print('Using iPhoneOS ARCHS={}'.format(iphoneos_archs)) iphonesimulator_archs = args.iphonesimulator_archs if not iphonesimulator_archs and not args.build_only_specified_archs: # Supply defaults iphonesimulator_archs = "x86_64,arm64" - print(f'Using iPhoneSimulator ARCHS={iphonesimulator_archs}') + print('Using iPhoneSimulator ARCHS={}'.format(iphonesimulator_archs)) macos_archs = args.macos_archs if not macos_archs and not args.build_only_specified_archs: # Supply defaults macos_archs = "x86_64,arm64" - print(f'Using MacOS ARCHS={macos_archs}') + print('Using MacOS ARCHS={}'.format(macos_archs)) catalyst_archs = args.macos_archs if not catalyst_archs and not args.build_only_specified_archs: # Supply defaults catalyst_archs = "x86_64,arm64" - print(f'Using Catalyst ARCHS={catalyst_archs}') + print('Using Catalyst ARCHS={}'.format(catalyst_archs)) # Build phase @@ -73,7 +74,7 @@ if __name__ == "__main__": build_folders = [] def get_or_create_build_folder(base_dir, platform): - build_folder = f"./{base_dir}/{platform}".replace(" ", "\\ ") # Escape spaces in output path + build_folder = "./{}/{}".format(base_dir, platform).replace(" ", "\\ ") # Escape spaces in output path pathlib.Path(build_folder).mkdir(parents=True, exist_ok=True) return build_folder @@ -105,18 +106,20 @@ if __name__ == "__main__": # Put all the built .frameworks together into a .xcframework print_header("Building xcframework") + + framework_path = "{}/{}.xcframework".format(args.out, args.framework_name) xcframework_build_command = [ "xcodebuild", "-create-xcframework", "-output", - f"{args.out}/{args.framework_name}.xcframework", + framework_path, ] for folder in build_folders: - xcframework_build_command += ["-framework", f"{folder}/{args.framework_name}.framework"] + xcframework_build_command += ["-framework", "{}/{}.framework".format(folder, args.framework_name)] execute(xcframework_build_command, cwd=os.getcwd()) print("") - print_header(f"Finished building {args.out}/{args.framework_name}.xcframework") + print_header("Finished building {}".format(framework_path)) except Exception as e: print_error(e) traceback.print_exc(file=sys.stderr) From da2978f607e4566601d3e86a873f46178b090172 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sat, 28 Nov 2020 13:13:28 +0000 Subject: [PATCH 005/422] ts: cvtest::debugLevel / --test_debug= option --- modules/calib3d/test/test_cornerssubpix.cpp | 7 ++++--- modules/calib3d/test/test_fisheye.cpp | 6 ++++-- modules/imgproc/test/test_drawing.cpp | 3 ++- modules/ts/include/opencv2/ts/ts_ext.hpp | 1 + modules/ts/src/ts.cpp | 10 ++++++++++ 5 files changed, 21 insertions(+), 6 deletions(-) diff --git a/modules/calib3d/test/test_cornerssubpix.cpp b/modules/calib3d/test/test_cornerssubpix.cpp index 05b75c5cbc..b70cc1e988 100644 --- a/modules/calib3d/test/test_cornerssubpix.cpp +++ b/modules/calib3d/test/test_cornerssubpix.cpp @@ -153,9 +153,8 @@ void CV_ChessboardSubpixelTest::run( int ) vector test_corners; bool result = findChessboardCorners(chessboard_image, pattern_size, test_corners, 15); - if(!result) + if (!result && cvtest::debugLevel > 0) { -#if 0 ts->printf(cvtest::TS::LOG, "Warning: chessboard was not detected! Writing image to test.png\n"); ts->printf(cvtest::TS::LOG, "Size = %d, %d\n", pattern_size.width, pattern_size.height); ts->printf(cvtest::TS::LOG, "Intrinsic params: fx = %f, fy = %f, cx = %f, cy = %f\n", @@ -167,7 +166,9 @@ void CV_ChessboardSubpixelTest::run( int ) distortion_coeffs_.at(0, 4)); imwrite("test.png", chessboard_image); -#endif + } + if (!result) + { continue; } diff --git a/modules/calib3d/test/test_fisheye.cpp b/modules/calib3d/test/test_fisheye.cpp index eedc2fa4fe..8e509cf35e 100644 --- a/modules/calib3d/test/test_fisheye.cpp +++ b/modules/calib3d/test/test_fisheye.cpp @@ -449,7 +449,10 @@ TEST_F(fisheyeTest, stereoRectify) << "Q =" << std::endl << Q << std::endl; } -#if 1 // Debug code + if (cvtest::debugLevel == 0) + return; + // DEBUG code is below + cv::Mat lmapx, lmapy, rmapx, rmapy; //rewrite for fisheye cv::fisheye::initUndistortRectifyMap(K1, D1, R1, P1, requested_size, CV_32F, lmapx, lmapy); @@ -482,7 +485,6 @@ TEST_F(fisheyeTest, stereoRectify) cv::imwrite(cv::format("fisheye_rectification_AB_%03d.png", i), rectification); } -#endif } TEST_F(fisheyeTest, stereoCalibrate) diff --git a/modules/imgproc/test/test_drawing.cpp b/modules/imgproc/test/test_drawing.cpp index fab2631041..42aa386b5a 100644 --- a/modules/imgproc/test/test_drawing.cpp +++ b/modules/imgproc/test/test_drawing.cpp @@ -487,7 +487,8 @@ protected: img->copyTo(sub); shift += img->size().height + 1; } - //imwrite("/tmp/all_fonts.png", result); + if (cvtest::debugLevel > 0) + imwrite("all_fonts.png", result); } }; diff --git a/modules/ts/include/opencv2/ts/ts_ext.hpp b/modules/ts/include/opencv2/ts/ts_ext.hpp index b2a4cac241..5c09b569a5 100644 --- a/modules/ts/include/opencv2/ts/ts_ext.hpp +++ b/modules/ts/include/opencv2/ts/ts_ext.hpp @@ -13,6 +13,7 @@ void checkIppStatus(); extern bool skipUnstableTests; extern bool runBigDataTests; extern int testThreads; +extern int debugLevel; //< 0 - no debug, 1 - basic test debug information, >1 - extra debug information void testSetUp(); void testTearDown(); diff --git a/modules/ts/src/ts.cpp b/modules/ts/src/ts.cpp index bad799dc4d..b66779c829 100644 --- a/modules/ts/src/ts.cpp +++ b/modules/ts/src/ts.cpp @@ -774,6 +774,7 @@ static bool checkTestData = cv::utils::getConfigurationParameterBool("OPENCV_TES bool skipUnstableTests = false; bool runBigDataTests = false; int testThreads = 0; +int debugLevel = (int)cv::utils::getConfigurationParameterSizeT("OPENCV_TEST_DEBUG", 0); static size_t memory_usage_base = 0; @@ -883,6 +884,7 @@ void parseCustomOptions(int argc, char **argv) "{ test_threads |-1 |the number of worker threads, if parallel execution is enabled}" "{ skip_unstable |false |skip unstable tests }" "{ test_bigdata |false |run BigData tests (>=2Gb) }" + "{ test_debug | |0 - no debug (default), 1 - basic test debug information, >1 - extra debug information }" "{ test_require_data |") + (checkTestData ? "true" : "false") + string("|fail on missing non-required test data instead of skip (env:OPENCV_TEST_REQUIRE_DATA)}" CV_TEST_TAGS_PARAMS "{ h help |false |print help info }" @@ -909,6 +911,14 @@ void parseCustomOptions(int argc, char **argv) skipUnstableTests = parser.get("skip_unstable"); runBigDataTests = parser.get("test_bigdata"); + if (parser.has("test_debug")) + { + cv::String s = parser.get("test_debug"); + if (s.empty() || s == "true") + debugLevel = 1; + else + debugLevel = parser.get("test_debug"); + } if (parser.has("test_require_data")) checkTestData = parser.get("test_require_data"); From 4e4458416d4140dea509d669a8605650ac0ff314 Mon Sep 17 00:00:00 2001 From: Zhiming-Zeng <1773677072@qq.com> Date: Sun, 29 Nov 2020 18:09:42 +0800 Subject: [PATCH 006/422] Merge pull request #18064 from akineeic:gsoc_2020_dnn [GSoC] Develop OpenCV.js DNN modules for promising web use cases together with their tutorials * [Opencv.js doc] Init commit to add image classification example in opencv.js tutorial * [Opencv.js doc] Make the code snippet interactive and put the functions into code snippet. * Fix the utils.loadOpenCv for promise module * [Opencv.js doc] Code modify and fixed layout issue. * [Opencv.js doc] Add a JSON file to store parameters for models and show in the web page. * [Opencv.js doc] Change let to const. * [Opencv.js doc] Init commit to add image classification example with camera in opencv.js tutorial * [Opencv.js doc] Init commit to add semantic segmentation example in opencv.js tutorial * [Opencv.js doc] Add object detection example, supprot YOLOv2 * [Opencv.js doc] Support SSD model for object detection example * [Opencv.js doc] Add fast neural style transfer example with opencv.js * [Opencv.js doc] Add pose estimation example in opencv.js tutorial * Delete whitespace for code check * [Opencv.js doc] Add object detection example with camera * [Opencv.js doc] Add json files containing model information to each example * [Opencv.js doc] Add a js file for common function in dnn example * [Opencv.js doc] Create single function getBlobFromImage * [Opencv.js doc] Add url of model into webpage * [OpenCV.js doc] Update UI for running * [Opencv.js doc] Load dnn model by input button * [Opencv.js doc] Fix some UI issues * [Opencv.js doc] Change code format Co-authored-by: Ningxin Hu --- .../js_assets/js_dnn_example_helper.js | 119 ++++++ .../js_assets/js_image_classification.html | 263 ++++++++++++ .../js_image_classification_model_info.json | 65 +++ .../js_image_classification_with_camera.html | 281 ++++++++++++ .../js_assets/js_object_detection.html | 387 +++++++++++++++++ .../js_object_detection_model_info.json | 39 ++ .../js_object_detection_with_camera.html | 402 ++++++++++++++++++ .../js_assets/js_pose_estimation.html | 327 ++++++++++++++ .../js_pose_estimation_model_info.json | 34 ++ .../js_assets/js_semantic_segmentation.html | 243 +++++++++++ .../js_semantic_segmentation_model_info.json | 12 + .../js_assets/js_style_transfer.html | 228 ++++++++++ .../js_style_transfer_model_info.json | 76 ++++ doc/js_tutorials/js_assets/utils.js | 10 +- .../js_image_classification.markdown | 13 + ..._image_classification_with_camera.markdown | 15 + .../js_object_detection.markdown | 13 + .../js_object_detection_with_camera.markdown | 13 + .../js_pose_estimation.markdown | 13 + .../js_semantic_segmentation.markdown | 13 + .../js_style_transfer.markdown | 13 + .../js_dnn/js_table_of_contents_dnn.markdown | 30 ++ doc/js_tutorials/js_tutorials.markdown | 4 + 23 files changed, 2611 insertions(+), 2 deletions(-) create mode 100644 doc/js_tutorials/js_assets/js_dnn_example_helper.js create mode 100644 doc/js_tutorials/js_assets/js_image_classification.html create mode 100644 doc/js_tutorials/js_assets/js_image_classification_model_info.json create mode 100644 doc/js_tutorials/js_assets/js_image_classification_with_camera.html create mode 100644 doc/js_tutorials/js_assets/js_object_detection.html create mode 100644 doc/js_tutorials/js_assets/js_object_detection_model_info.json create mode 100644 doc/js_tutorials/js_assets/js_object_detection_with_camera.html create mode 100644 doc/js_tutorials/js_assets/js_pose_estimation.html create mode 100644 doc/js_tutorials/js_assets/js_pose_estimation_model_info.json create mode 100644 doc/js_tutorials/js_assets/js_semantic_segmentation.html create mode 100644 doc/js_tutorials/js_assets/js_semantic_segmentation_model_info.json create mode 100644 doc/js_tutorials/js_assets/js_style_transfer.html create mode 100644 doc/js_tutorials/js_assets/js_style_transfer_model_info.json create mode 100644 doc/js_tutorials/js_dnn/js_image_classification/js_image_classification.markdown create mode 100644 doc/js_tutorials/js_dnn/js_image_classification/js_image_classification_with_camera.markdown create mode 100644 doc/js_tutorials/js_dnn/js_object_detection/js_object_detection.markdown create mode 100644 doc/js_tutorials/js_dnn/js_object_detection/js_object_detection_with_camera.markdown create mode 100644 doc/js_tutorials/js_dnn/js_pose_estimation/js_pose_estimation.markdown create mode 100644 doc/js_tutorials/js_dnn/js_semantic_segmentation/js_semantic_segmentation.markdown create mode 100644 doc/js_tutorials/js_dnn/js_style_transfer/js_style_transfer.markdown create mode 100644 doc/js_tutorials/js_dnn/js_table_of_contents_dnn.markdown diff --git a/doc/js_tutorials/js_assets/js_dnn_example_helper.js b/doc/js_tutorials/js_assets/js_dnn_example_helper.js new file mode 100644 index 0000000000..06baa6760b --- /dev/null +++ b/doc/js_tutorials/js_assets/js_dnn_example_helper.js @@ -0,0 +1,119 @@ +getBlobFromImage = function(inputSize, mean, std, swapRB, image) { + let mat; + if (typeof(image) === 'string') { + mat = cv.imread(image); + } else { + mat = image; + } + + let matC3 = new cv.Mat(mat.matSize[0], mat.matSize[1], cv.CV_8UC3); + cv.cvtColor(mat, matC3, cv.COLOR_RGBA2BGR); + let input = cv.blobFromImage(matC3, std, new cv.Size(inputSize[0], inputSize[1]), + new cv.Scalar(mean[0], mean[1], mean[2]), swapRB); + + matC3.delete(); + return input; +} + +loadLables = async function(labelsUrl) { + let response = await fetch(labelsUrl); + let label = await response.text(); + label = label.split('\n'); + return label; +} + +loadModel = async function(e) { + return new Promise((resolve) => { + let file = e.target.files[0]; + let path = file.name; + let reader = new FileReader(); + reader.readAsArrayBuffer(file); + reader.onload = function(ev) { + if (reader.readyState === 2) { + let buffer = reader.result; + let data = new Uint8Array(buffer); + cv.FS_createDataFile('/', path, data, true, false, false); + resolve(path); + } + } + }); +} + +getTopClasses = function(probs, labels, topK = 3) { + probs = Array.from(probs); + let indexes = probs.map((prob, index) => [prob, index]); + let sorted = indexes.sort((a, b) => { + if (a[0] === b[0]) {return 0;} + return a[0] < b[0] ? -1 : 1; + }); + sorted.reverse(); + let classes = []; + for (let i = 0; i < topK; ++i) { + let prob = sorted[i][0]; + let index = sorted[i][1]; + let c = { + label: labels[index], + prob: (prob * 100).toFixed(2) + } + classes.push(c); + } + return classes; +} + +loadImageToCanvas = function(e, canvasId) { + let files = e.target.files; + let imgUrl = URL.createObjectURL(files[0]); + let canvas = document.getElementById(canvasId); + let ctx = canvas.getContext('2d'); + let img = new Image(); + img.crossOrigin = 'anonymous'; + img.src = imgUrl; + img.onload = function() { + ctx.drawImage(img, 0, 0, canvas.width, canvas.height); + }; +} + +drawInfoTable = async function(jsonUrl, divId) { + let response = await fetch(jsonUrl); + let json = await response.json(); + + let appendix = document.getElementById(divId); + for (key of Object.keys(json)) { + let h3 = document.createElement('h3'); + h3.textContent = key + " model"; + appendix.appendChild(h3); + + let table = document.createElement('table'); + let head_tr = document.createElement('tr'); + for (head of Object.keys(json[key][0])) { + let th = document.createElement('th'); + th.textContent = head; + th.style.border = "1px solid black"; + head_tr.appendChild(th); + } + table.appendChild(head_tr) + + for (model of json[key]) { + let tr = document.createElement('tr'); + for (params of Object.keys(model)) { + let td = document.createElement('td'); + td.style.border = "1px solid black"; + if (params !== "modelUrl" && params !== "configUrl" && params !== "labelsUrl") { + td.textContent = model[params]; + tr.appendChild(td); + } else { + let a = document.createElement('a'); + let link = document.createTextNode('link'); + a.append(link); + a.href = model[params]; + td.appendChild(a); + tr.appendChild(td); + } + } + table.appendChild(tr); + } + table.style.width = "800px"; + table.style.borderCollapse = "collapse"; + appendix.appendChild(table); + } +} diff --git a/doc/js_tutorials/js_assets/js_image_classification.html b/doc/js_tutorials/js_assets/js_image_classification.html new file mode 100644 index 0000000000..656f2720b6 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_image_classification.html @@ -0,0 +1,263 @@ + + + + + + Image Classification Example + + + + +

Image Classification Example

+

+ This tutorial shows you how to write an image classification example with OpenCV.js.
+ To try the example you should click the modelFile button(and configFile button if needed) to upload inference model. + You can find the model URLs and parameters in the model info section. + Then You should change the parameters in the first code snippet according to the uploaded model. + Finally click Try it button to see the result. You can choose any other images.
+

+ +
+
+ + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+
+
+ canvasInput +
+
+
+ modelFile +
+
+
+ configFile +
+
+
+ +
+

+
+ +
+

Help function

+

1.The parameters for model inference which you can modify to investigate more models.

+ +

2.Main loop in which will read the image from canvas and do inference once.

+ +

3.Load labels from txt file and process it into an array.

+ +

4.Get blob from image as input for net, and standardize it with mean and std.

+ +

5.Fetch model file and save to emscripten file system once click the input button.

+ +

6.The post-processing, including softmax if needed and get the top classes from the output vector.

+ +
+ +
+

Model Info:

+
+ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_image_classification_model_info.json b/doc/js_tutorials/js_assets/js_image_classification_model_info.json new file mode 100644 index 0000000000..67553ec2d3 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_image_classification_model_info.json @@ -0,0 +1,65 @@ +{ + "caffe": [ + { + "model": "alexnet", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "needSoftmax": "false", + "labelsUrl": "https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/classification_classes_ILSVRC2012.txt", + "modelUrl": "http://dl.caffe.berkeleyvision.org/bvlc_alexnet.caffemodel", + "configUrl": "https://raw.githubusercontent.com/BVLC/caffe/master/models/bvlc_alexnet/deploy.prototxt" + }, + { + "model": "densenet", + "mean": "127.5, 127.5, 127.5", + "std": "0.007843", + "swapRB": "false", + "needSoftmax": "true", + "labelsUrl": "https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/classification_classes_ILSVRC2012.txt", + "modelUrl": "https://drive.google.com/open?id=0B7ubpZO7HnlCcHlfNmJkU2VPelE", + "configUrl": "https://raw.githubusercontent.com/shicai/DenseNet-Caffe/master/DenseNet_121.prototxt" + }, + { + "model": "googlenet", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "needSoftmax": "false", + "labelsUrl": "https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/classification_classes_ILSVRC2012.txt", + "modelUrl": "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel", + "configUrl": "https://raw.githubusercontent.com/BVLC/caffe/master/models/bvlc_googlenet/deploy.prototxt" + }, + { + "model": "squeezenet", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "needSoftmax": "false", + "labelsUrl": "https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/classification_classes_ILSVRC2012.txt", + "modelUrl": "https://raw.githubusercontent.com/forresti/SqueezeNet/master/SqueezeNet_v1.0/squeezenet_v1.0.caffemodel", + "configUrl": "https://raw.githubusercontent.com/forresti/SqueezeNet/master/SqueezeNet_v1.0/deploy.prototxt" + }, + { + "model": "VGG", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "needSoftmax": "false", + "labelsUrl": "https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/classification_classes_ILSVRC2012.txt", + "modelUrl": "http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_19_layers.caffemodel", + "configUrl": "https://gist.githubusercontent.com/ksimonyan/3785162f95cd2d5fee77/raw/f02f8769e64494bcd3d7e97d5d747ac275825721/VGG_ILSVRC_19_layers_deploy.prototxt" + } + ], + "tensorflow": [ + { + "model": "inception", + "mean": "123, 117, 104", + "std": "1", + "swapRB": "true", + "needSoftmax": "false", + "labelsUrl": "https://raw.githubusercontent.com/petewarden/tf_ios_makefile_example/master/data/imagenet_comp_graph_label_strings.txt", + "modelUrl": "https://raw.githubusercontent.com/petewarden/tf_ios_makefile_example/master/data/tensorflow_inception_graph.pb" + } + ] +} \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_image_classification_with_camera.html b/doc/js_tutorials/js_assets/js_image_classification_with_camera.html new file mode 100644 index 0000000000..9a2473cf2b --- /dev/null +++ b/doc/js_tutorials/js_assets/js_image_classification_with_camera.html @@ -0,0 +1,281 @@ + + + + + + Image Classification Example with Camera + + + + +

Image Classification Example with Camera

+

+ This tutorial shows you how to write an image classification example with camera.
+ To try the example you should click the modelFile button(and configFile button if needed) to upload inference model. + You can find the model URLs and parameters in the model info section. + Then You should change the parameters in the first code snippet according to the uploaded model. + Finally click Start/Stop button to start or stop the camera capture.
+

+ +
+
+ + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+
+
+ videoInput +
+
+
+ modelFile +
+
+
+ configFile +
+
+
+ +
+

+
+ +
+

Help function

+

1.The parameters for model inference which you can modify to investigate more models.

+ +

2.The function to capture video from camera, and the main loop in which will do inference once.

+ +

3.Load labels from txt file and process it into an array.

+ +

4.Get blob from image as input for net, and standardize it with mean and std.

+ +

5.Fetch model file and save to emscripten file system once click the input button.

+ +

6.The post-processing, including softmax if needed and get the top classes from the output vector.

+ +
+ +
+

Model Info:

+
+ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_object_detection.html b/doc/js_tutorials/js_assets/js_object_detection.html new file mode 100644 index 0000000000..53f1e48639 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_object_detection.html @@ -0,0 +1,387 @@ + + + + + + Object Detection Example + + + + +

Object Detection Example

+

+ This tutorial shows you how to write an object detection example with OpenCV.js.
+ To try the example you should click the modelFile button(and configFile button if needed) to upload inference model. + You can find the model URLs and parameters in the model info section. + Then You should change the parameters in the first code snippet according to the uploaded model. + Finally click Try it button to see the result. You can choose any other images.
+

+ +
+
+ + + + + + + + + + + + + + + +
+ + + +
+
+ canvasInput +
+
+

+
+
+ modelFile +
+
+
+ configFile +
+
+
+ +
+

+
+ +
+

Help function

+

1.The parameters for model inference which you can modify to investigate more models.

+ +

2.Main loop in which will read the image from canvas and do inference once.

+ +

3.Load labels from txt file and process it into an array.

+ +

4.Get blob from image as input for net, and standardize it with mean and std.

+ +

5.Fetch model file and save to emscripten file system once click the input button.

+ +

6.The post-processing, including get boxes from output and draw boxes into the image.

+ +
+ +
+

Model Info:

+
+ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_object_detection_model_info.json b/doc/js_tutorials/js_assets/js_object_detection_model_info.json new file mode 100644 index 0000000000..c0d14be714 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_object_detection_model_info.json @@ -0,0 +1,39 @@ +{ + "caffe": [ + { + "model": "mobilenet_SSD", + "inputSize": "300, 300", + "mean": "127.5, 127.5, 127.5", + "std": "0.007843", + "swapRB": "false", + "outType": "SSD", + "labelsUrl": "https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/object_detection_classes_pascal_voc.txt", + "modelUrl": "https://raw.githubusercontent.com/chuanqi305/MobileNet-SSD/master/mobilenet_iter_73000.caffemodel", + "configUrl": "https://raw.githubusercontent.com/chuanqi305/MobileNet-SSD/master/deploy.prototxt" + }, + { + "model": "VGG_SSD", + "inputSize": "300, 300", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "outType": "SSD", + "labelsUrl": "https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/object_detection_classes_pascal_voc.txt", + "modelUrl": "https://drive.google.com/uc?id=0BzKzrI_SkD1_WVVTSmQxU0dVRzA&export=download", + "configUrl": "https://drive.google.com/uc?id=0BzKzrI_SkD1_WVVTSmQxU0dVRzA&export=download" + } + ], + "darknet": [ + { + "model": "yolov2_tiny", + "inputSize": "416, 416", + "mean": "0, 0, 0", + "std": "0.00392", + "swapRB": "false", + "outType": "YOLO", + "labelsUrl": "https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/object_detection_classes_yolov3.txt", + "modelUrl": "https://pjreddie.com/media/files/yolov2-tiny.weights", + "configUrl": "https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov2-tiny.cfg" + } + ] +} \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_object_detection_with_camera.html b/doc/js_tutorials/js_assets/js_object_detection_with_camera.html new file mode 100644 index 0000000000..41bb609708 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_object_detection_with_camera.html @@ -0,0 +1,402 @@ + + + + + + Object Detection Example with Camera + + + + +

Object Detection Example with Camera

+

+ This tutorial shows you how to write an object detection example with camera.
+ To try the example you should click the modelFile button(and configInput button if needed) to upload inference model. + You can find the model URLs and parameters in the model info section. + Then You should change the parameters in the first code snippet according to the uploaded model. + Finally click Start/Stop button to start or stop the camera capture.
+

+ +
+
+ + + + + + + + + + + + + + + +
+ + + +
+
+ videoInput +
+
+

+
+
+ modelFile +
+
+
+ configFile +
+
+
+ +
+

+
+ +
+

Help function

+

1.The parameters for model inference which you can modify to investigate more models.

+ +

2.The function to capture video from camera, and the main loop in which will do inference once.

+ +

3.Load labels from txt file and process it into an array.

+ +

4.Get blob from image as input for net, and standardize it with mean and std.

+ +

5.Fetch model file and save to emscripten file system once click the input button.

+ +

6.The post-processing, including get boxes from output and draw boxes into the image.

+ +
+ +
+

Model Info:

+
+ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_pose_estimation.html b/doc/js_tutorials/js_assets/js_pose_estimation.html new file mode 100644 index 0000000000..19c64663d1 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_pose_estimation.html @@ -0,0 +1,327 @@ + + + + + + Pose Estimation Example + + + + +

Pose Estimation Example

+

+ This tutorial shows you how to write an pose estimation example with OpenCV.js.
+ To try the example you should click the modelFile button(and configInput button if needed) to upload inference model. + You can find the model URLs and parameters in the model info section. + Then You should change the parameters in the first code snippet according to the uploaded model. + Finally click Try it button to see the result. You can choose any other images.
+

+ +
+
+ + + + + + + + + + + + + + + +
+ + + +
+
+ canvasInput +
+
+

+
+
+ modelFile +
+
+
+ configFile +
+
+
+ +
+

+
+ +
+

Help function

+

1.The parameters for model inference which you can modify to investigate more models.

+ +

2.Main loop in which will read the image from canvas and do inference once.

+ +

3.Get blob from image as input for net, and standardize it with mean and std.

+ +

4.Fetch model file and save to emscripten file system once click the input button.

+ +

5.The pairs of keypoints of different dataset.

+ +

6.The post-processing, including get the predicted points and draw lines into the image.

+ +
+ +
+

Model Info:

+
+ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_pose_estimation_model_info.json b/doc/js_tutorials/js_assets/js_pose_estimation_model_info.json new file mode 100644 index 0000000000..922c813f39 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_pose_estimation_model_info.json @@ -0,0 +1,34 @@ +{ + "caffe": [ + { + "model": "body_25", + "inputSize": "368, 368", + "mean": "0, 0, 0", + "std": "0.00392", + "swapRB": "false", + "dataset": "BODY_25", + "modelUrl": "http://posefs1.perception.cs.cmu.edu/OpenPose/models/pose/body_25/pose_iter_584000.caffemodel", + "configUrl": "https://raw.githubusercontent.com/CMU-Perceptual-Computing-Lab/openpose/master/models/pose/body_25/pose_deploy.prototxt" + }, + { + "model": "coco", + "inputSize": "368, 368", + "mean": "0, 0, 0", + "std": "0.00392", + "swapRB": "false", + "dataset": "COCO", + "modelUrl": "http://posefs1.perception.cs.cmu.edu/OpenPose/models/pose/coco/pose_iter_440000.caffemodel", + "configUrl": "https://raw.githubusercontent.com/CMU-Perceptual-Computing-Lab/openpose/master/models/pose/coco/pose_deploy_linevec.prototxt" + }, + { + "model": "mpi", + "inputSize": "368, 368", + "mean": "0, 0, 0", + "std": "0.00392", + "swapRB": "false", + "dataset": "MPI", + "modelUrl": "http://posefs1.perception.cs.cmu.edu/OpenPose/models/pose/mpi/pose_iter_160000.caffemodel", + "configUrl": "https://raw.githubusercontent.com/CMU-Perceptual-Computing-Lab/openpose/master/models/pose/mpi/pose_deploy_linevec.prototxt" + } + ] +} \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_semantic_segmentation.html b/doc/js_tutorials/js_assets/js_semantic_segmentation.html new file mode 100644 index 0000000000..6fc27dbd19 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_semantic_segmentation.html @@ -0,0 +1,243 @@ + + + + + + Semantic Segmentation Example + + + + +

Semantic Segmentation Example

+

+ This tutorial shows you how to write an semantic segmentation example with OpenCV.js.
+ To try the example you should click the modelFile button(and configInput button if needed) to upload inference model. + You can find the model URLs and parameters in the model info section. + Then You should change the parameters in the first code snippet according to the uploaded model. + Finally click Try it button to see the result. You can choose any other images.
+

+ +
+
+ + + + + + + + + + + + + + + +
+ + + +
+
+ canvasInput +
+
+

+
+
+ modelFile +
+
+
+ configFile +
+
+
+ +
+

+
+ +
+

Help function

+

1.The parameters for model inference which you can modify to investigate more models.

+ +

2.Main loop in which will read the image from canvas and do inference once.

+ +

3.Get blob from image as input for net, and standardize it with mean and std.

+ +

4.Fetch model file and save to emscripten file system once click the input button.

+ +

5.The post-processing, including gengerate colors for different classes and argmax to get the classes for each pixel.

+ +
+ +
+

Model Info:

+
+ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_semantic_segmentation_model_info.json b/doc/js_tutorials/js_assets/js_semantic_segmentation_model_info.json new file mode 100644 index 0000000000..ef0016af1d --- /dev/null +++ b/doc/js_tutorials/js_assets/js_semantic_segmentation_model_info.json @@ -0,0 +1,12 @@ +{ + "tensorflow": [ + { + "model": "deeplabv3", + "inputSize": "513, 513", + "mean": "127.5, 127.5, 127.5", + "std": "0.007843", + "swapRB": "false", + "modelUrl": "https://drive.google.com/uc?id=1v-hfGenaE9tiGOzo5qdgMNG_gqQ5-Xn4&export=download" + } + ] +} \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_style_transfer.html b/doc/js_tutorials/js_assets/js_style_transfer.html new file mode 100644 index 0000000000..91422e1344 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_style_transfer.html @@ -0,0 +1,228 @@ + + + + + + Style Transfer Example + + + + +

Style Transfer Example

+

+ This tutorial shows you how to write an style transfer example with OpenCV.js.
+ To try the example you should click the modelFile button(and configFile button if needed) to upload inference model. + You can find the model URLs and parameters in the model info section. + Then You should change the parameters in the first code snippet according to the uploaded model. + Finally click Try it button to see the result. You can choose any other images.
+

+ +
+
+ + + + + + + + + + + + + + + +
+ + + +
+
+ canvasInput +
+
+

+
+
+ modelFile +
+
+
+ configFile +
+
+
+ +
+

+
+ +
+

Help function

+

1.The parameters for model inference which you can modify to investigate more models.

+ +

2.Main loop in which will read the image from canvas and do inference once.

+ +

3.Get blob from image as input for net, and standardize it with mean and std.

+ +

4.Fetch model file and save to emscripten file system once click the input button.

+ +

5.The post-processing, including scaling and reordering.

+ +
+ +
+

Model Info:

+
+ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_style_transfer_model_info.json b/doc/js_tutorials/js_assets/js_style_transfer_model_info.json new file mode 100644 index 0000000000..9cc66018a0 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_style_transfer_model_info.json @@ -0,0 +1,76 @@ +{ + "torch": [ + { + "model": "candy.t7", + "inputSize": "224, 224", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "modelUrl": "https://cs.stanford.edu/people/jcjohns/fast-neural-style/models//instance_norm/candy.t7" + }, + { + "model": "composition_vii.t7", + "inputSize": "224, 224", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "modelUrl": "https://cs.stanford.edu/people/jcjohns/fast-neural-style/models//eccv16/composition_vii.t7" + }, + { + "model": "feathers.t7", + "inputSize": "224, 224", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "modelUrl": "https://cs.stanford.edu/people/jcjohns/fast-neural-style/models//instance_norm/feathers.t7" + }, + { + "model": "la_muse.t7", + "inputSize": "224, 224", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "modelUrl": "https://cs.stanford.edu/people/jcjohns/fast-neural-style/models//instance_norm/la_muse.t7" + }, + { + "model": "mosaic.t7", + "inputSize": "224, 224", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "modelUrl": "https://cs.stanford.edu/people/jcjohns/fast-neural-style/models//instance_norm/mosaic.t7" + }, + { + "model": "starry_night.t7", + "inputSize": "224, 224", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "modelUrl": "https://cs.stanford.edu/people/jcjohns/fast-neural-style/models//eccv16/starry_night.t7" + }, + { + "model": "the_scream.t7", + "inputSize": "224, 224", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "modelUrl": "https://cs.stanford.edu/people/jcjohns/fast-neural-style/models//instance_norm/the_scream.t7" + }, + { + "model": "the_wave.t7", + "inputSize": "224, 224", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "modelUrl": "https://cs.stanford.edu/people/jcjohns/fast-neural-style/models//eccv16/the_wave.t7" + }, + { + "model": "udnie.t7", + "inputSize": "224, 224", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "modelUrl": "https://cs.stanford.edu/people/jcjohns/fast-neural-style/models//instance_norm/udnie.t7" + } + ] +} \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/utils.js b/doc/js_tutorials/js_assets/utils.js index 4d5deb0b51..65f6d1782d 100644 --- a/doc/js_tutorials/js_assets/utils.js +++ b/doc/js_tutorials/js_assets/utils.js @@ -7,7 +7,7 @@ function Utils(errorOutputId) { // eslint-disable-line no-unused-vars let script = document.createElement('script'); script.setAttribute('async', ''); script.setAttribute('type', 'text/javascript'); - script.addEventListener('load', () => { + script.addEventListener('load', async () => { if (cv.getBuildInformation) { console.log(cv.getBuildInformation()); @@ -16,9 +16,15 @@ function Utils(errorOutputId) { // eslint-disable-line no-unused-vars else { // WASM - cv['onRuntimeInitialized']=()=>{ + if (cv instanceof Promise) { + cv = await cv; console.log(cv.getBuildInformation()); onloadCallback(); + } else { + cv['onRuntimeInitialized']=()=>{ + console.log(cv.getBuildInformation()); + onloadCallback(); + } } } }); diff --git a/doc/js_tutorials/js_dnn/js_image_classification/js_image_classification.markdown b/doc/js_tutorials/js_dnn/js_image_classification/js_image_classification.markdown new file mode 100644 index 0000000000..1a94f8d14a --- /dev/null +++ b/doc/js_tutorials/js_dnn/js_image_classification/js_image_classification.markdown @@ -0,0 +1,13 @@ +Image Classification Example {#tutorial_js_image_classification} +======================================= + +Goal +---- + +- In this tutorial you will learn how to use OpenCV.js dnn module for image classification. + +\htmlonly + +\endhtmlonly \ No newline at end of file diff --git a/doc/js_tutorials/js_dnn/js_image_classification/js_image_classification_with_camera.markdown b/doc/js_tutorials/js_dnn/js_image_classification/js_image_classification_with_camera.markdown new file mode 100644 index 0000000000..bdf11161fc --- /dev/null +++ b/doc/js_tutorials/js_dnn/js_image_classification/js_image_classification_with_camera.markdown @@ -0,0 +1,15 @@ +Image Classification Example with Camera {#tutorial_js_image_classification_with_camera} +======================================= + +Goal +---- + +- In this tutorial you will learn how to use OpenCV.js dnn module for image classification example with camera. + +@note If you don't know how to capture video from camera, please review @ref tutorial_js_video_display. + +\htmlonly + +\endhtmlonly \ No newline at end of file diff --git a/doc/js_tutorials/js_dnn/js_object_detection/js_object_detection.markdown b/doc/js_tutorials/js_dnn/js_object_detection/js_object_detection.markdown new file mode 100644 index 0000000000..980b45c236 --- /dev/null +++ b/doc/js_tutorials/js_dnn/js_object_detection/js_object_detection.markdown @@ -0,0 +1,13 @@ +Object Detection Example {#tutorial_js_object_detection} +======================================= + +Goal +---- + +- In this tutorial you will learn how to use OpenCV.js dnn module for object detection. + +\htmlonly + +\endhtmlonly \ No newline at end of file diff --git a/doc/js_tutorials/js_dnn/js_object_detection/js_object_detection_with_camera.markdown b/doc/js_tutorials/js_dnn/js_object_detection/js_object_detection_with_camera.markdown new file mode 100644 index 0000000000..e6e8f6f957 --- /dev/null +++ b/doc/js_tutorials/js_dnn/js_object_detection/js_object_detection_with_camera.markdown @@ -0,0 +1,13 @@ +Object Detection Example with Camera{#tutorial_js_object_detection_with_camera} +======================================= + +Goal +---- + +- In this tutorial you will learn how to use OpenCV.js dnn module for object detection with camera. + +\htmlonly + +\endhtmlonly \ No newline at end of file diff --git a/doc/js_tutorials/js_dnn/js_pose_estimation/js_pose_estimation.markdown b/doc/js_tutorials/js_dnn/js_pose_estimation/js_pose_estimation.markdown new file mode 100644 index 0000000000..b090ff2cfb --- /dev/null +++ b/doc/js_tutorials/js_dnn/js_pose_estimation/js_pose_estimation.markdown @@ -0,0 +1,13 @@ +Pose Estimation Example {#tutorial_js_pose_estimation} +======================================= + +Goal +---- + +- In this tutorial you will learn how to use OpenCV.js dnn module for pose estimation. + +\htmlonly + +\endhtmlonly \ No newline at end of file diff --git a/doc/js_tutorials/js_dnn/js_semantic_segmentation/js_semantic_segmentation.markdown b/doc/js_tutorials/js_dnn/js_semantic_segmentation/js_semantic_segmentation.markdown new file mode 100644 index 0000000000..50177fb549 --- /dev/null +++ b/doc/js_tutorials/js_dnn/js_semantic_segmentation/js_semantic_segmentation.markdown @@ -0,0 +1,13 @@ +Semantic Segmentation Example {#tutorial_js_semantic_segmentation} +======================================= + +Goal +---- + +- In this tutorial you will learn how to use OpenCV.js dnn module for semantic segmentation. + +\htmlonly + +\endhtmlonly \ No newline at end of file diff --git a/doc/js_tutorials/js_dnn/js_style_transfer/js_style_transfer.markdown b/doc/js_tutorials/js_dnn/js_style_transfer/js_style_transfer.markdown new file mode 100644 index 0000000000..7c1799ac6a --- /dev/null +++ b/doc/js_tutorials/js_dnn/js_style_transfer/js_style_transfer.markdown @@ -0,0 +1,13 @@ +Style Transfer Example {#tutorial_js_style_transfer} +======================================= + +Goal +---- + +- In this tutorial you will learn how to use OpenCV.js dnn module for style transfer. + +\htmlonly + +\endhtmlonly \ No newline at end of file diff --git a/doc/js_tutorials/js_dnn/js_table_of_contents_dnn.markdown b/doc/js_tutorials/js_dnn/js_table_of_contents_dnn.markdown new file mode 100644 index 0000000000..e008dc81d1 --- /dev/null +++ b/doc/js_tutorials/js_dnn/js_table_of_contents_dnn.markdown @@ -0,0 +1,30 @@ +Deep Neural Networks (dnn module) {#tutorial_js_table_of_contents_dnn} +============ + +- @subpage tutorial_js_image_classification + + Image classification example + +- @subpage tutorial_js_image_classification_with_camera + + Image classification example with camera + +- @subpage tutorial_js_object_detection + + Object detection example + +- @subpage tutorial_js_object_detection_with_camera + + Object detection example with camera + +- @subpage tutorial_js_semantic_segmentation + + Semantic segmentation example + +- @subpage tutorial_js_style_transfer + + Style transfer example + +- @subpage tutorial_js_pose_estimation + + Pose estimation example diff --git a/doc/js_tutorials/js_tutorials.markdown b/doc/js_tutorials/js_tutorials.markdown index c8a8f92a31..73e69daa98 100644 --- a/doc/js_tutorials/js_tutorials.markdown +++ b/doc/js_tutorials/js_tutorials.markdown @@ -26,3 +26,7 @@ OpenCV.js Tutorials {#tutorial_js_root} In this section you will object detection techniques like face detection etc. + +- @subpage tutorial_js_table_of_contents_dnn + + These tutorials show how to use dnn module in JavaScript From 95ce8f45ea1c4795ddb131c0c15760fb3b2571a1 Mon Sep 17 00:00:00 2001 From: Anton Potapov Date: Mon, 30 Nov 2020 16:15:13 +0300 Subject: [PATCH 007/422] Merge pull request #17851 from anton-potapov:sole_tbb_executor * TBB executor for GAPI - the sole executor - unit tests for it - no usage in the GAPI at the momnet * TBB executor for GAPI - introduced new overload of execute to explicitly accept tbb::arena argument - added more basic tests - moved arena creation code into tests - * TBB executor for GAPI - fixed compie errors & warnings * TBB executor for GAPI - split all-in-one execute() function into logicaly independant parts * TBB executor for GAPI - used util::variant in in the tile_node * TBB executor for GAPI - moved copy_through_move to separate header - rearranged details staff in proper namespaces - moved all implementation into detail namespace * TBB executor for GAPI - fixed build error with TBB 4.4. - fixed build warnings * TBB executor for GAPI - aligned strings width - fixed spaces in expressions - fixed english grammar - minor improvements * TBB executor for GAPI - added more comments - minor improvements * TBB executor for GAPI - changed ITT_ prefix for macroses to GAPI_ITT * TBB executor for GAPI - no more "unused" warning for GAPI_DbgAssert - changed local assert macro to man onto GAPI_DbgAssert * TBB executor for GAPI - file renamings - changed local assert macro to man onto GAPI_DbgAsse * TBB executor for GAPI - test file renamed - add more comments * TBB executor for GAPI - minor clenups and cosmetic changes * TBB executor for GAPI - minor clenups and cosmetic changes * TBB executor for GAPI - changed spaces and curly braces alignment * TBB executor for GAPI - minor cleanups * TBB executor for GAPI - minor cleanups --- modules/gapi/CMakeLists.txt | 5 + .../gapi/include/opencv2/gapi/own/assert.hpp | 18 +- .../opencv2/gapi/util/copy_through_move.hpp | 34 ++ modules/gapi/src/executor/gapi_itt.hpp | 59 +++ modules/gapi/src/executor/gtbbexecutor.cpp | 445 ++++++++++++++++++ modules/gapi/src/executor/gtbbexecutor.hpp | 103 ++++ .../executor/gtbbexecutor_internal_tests.cpp | 172 +++++++ 7 files changed, 833 insertions(+), 3 deletions(-) create mode 100644 modules/gapi/include/opencv2/gapi/util/copy_through_move.hpp create mode 100644 modules/gapi/src/executor/gapi_itt.hpp create mode 100644 modules/gapi/src/executor/gtbbexecutor.cpp create mode 100644 modules/gapi/src/executor/gtbbexecutor.hpp create mode 100644 modules/gapi/test/executor/gtbbexecutor_internal_tests.cpp diff --git a/modules/gapi/CMakeLists.txt b/modules/gapi/CMakeLists.txt index 0067cfa389..c1f58ee22b 100644 --- a/modules/gapi/CMakeLists.txt +++ b/modules/gapi/CMakeLists.txt @@ -107,6 +107,7 @@ set(gapi_srcs # Executor src/executor/gexecutor.cpp + src/executor/gtbbexecutor.cpp src/executor/gstreamingexecutor.cpp src/executor/gasync.cpp @@ -196,6 +197,10 @@ if(TARGET opencv_test_gapi) target_link_libraries(opencv_test_gapi PRIVATE ade) endif() +if(HAVE_TBB AND TARGET opencv_test_gapi) + ocv_target_link_libraries(opencv_test_gapi PRIVATE tbb) +endif() + if(HAVE_FREETYPE) ocv_target_compile_definitions(${the_module} PRIVATE -DHAVE_FREETYPE) if(TARGET opencv_test_gapi) diff --git a/modules/gapi/include/opencv2/gapi/own/assert.hpp b/modules/gapi/include/opencv2/gapi/own/assert.hpp index d0e0f1c3ff..d50543fdac 100644 --- a/modules/gapi/include/opencv2/gapi/own/assert.hpp +++ b/modules/gapi/include/opencv2/gapi/own/assert.hpp @@ -2,16 +2,28 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018 Intel Corporation +// Copyright (C) 2018-2020 Intel Corporation #ifndef OPENCV_GAPI_OWN_ASSERT_HPP #define OPENCV_GAPI_OWN_ASSERT_HPP +#include + +#define GAPI_DbgAssertNoOp(expr) { \ + constexpr bool _assert_tmp = false && (expr); \ + cv::util::suppress_unused_warning(_assert_tmp); \ +} + #if !defined(GAPI_STANDALONE) #include #define GAPI_Assert CV_Assert -#define GAPI_DbgAssert CV_DbgAssert + +#if defined _DEBUG || defined CV_STATIC_ANALYSIS +# define GAPI_DbgAssert CV_DbgAssert +#else +# define GAPI_DbgAssert(expr) GAPI_DbgAssertNoOp(expr) +#endif #else #include @@ -33,7 +45,7 @@ namespace detail #ifdef NDEBUG -# define GAPI_DbgAssert(expr) +# define GAPI_DbgAssert(expr) GAPI_DbgAssertNoOp(expr) #else # define GAPI_DbgAssert(expr) GAPI_Assert(expr) #endif diff --git a/modules/gapi/include/opencv2/gapi/util/copy_through_move.hpp b/modules/gapi/include/opencv2/gapi/util/copy_through_move.hpp new file mode 100644 index 0000000000..1a1121eb21 --- /dev/null +++ b/modules/gapi/include/opencv2/gapi/util/copy_through_move.hpp @@ -0,0 +1,34 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2020 Intel Corporation + +#ifndef OPENCV_GAPI_UTIL_COPY_THROUGH_MOVE_HPP +#define OPENCV_GAPI_UTIL_COPY_THROUGH_MOVE_HPP + +#include //decay_t + +namespace cv +{ +namespace util +{ + //This is a tool to move initialize captures of a lambda in C++11 + template + struct copy_through_move_t{ + T value; + const T& get() const {return value;} + T& get() {return value;} + copy_through_move_t(T&& g) : value(std::move(g)) {} + copy_through_move_t(copy_through_move_t&&) = default; + copy_through_move_t(copy_through_move_t const& lhs) : copy_through_move_t(std::move(const_cast(lhs))) {} + }; + + template + copy_through_move_t> copy_through_move(T&& t){ + return std::forward(t); + } +} // namespace util +} // namespace cv + +#endif /* OPENCV_GAPI_UTIL_COPY_THROUGH_MOVE_HPP */ diff --git a/modules/gapi/src/executor/gapi_itt.hpp b/modules/gapi/src/executor/gapi_itt.hpp new file mode 100644 index 0000000000..2ab3237e7f --- /dev/null +++ b/modules/gapi/src/executor/gapi_itt.hpp @@ -0,0 +1,59 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2020 Intel Corporation + +#ifndef OPENCV_GAPI_GAPI_ITT_HPP +#define OPENCV_GAPI_GAPI_ITT_HPP + +//for ITT_NAMED_TRACE_GUARD +#include +#include + +// FIXME: It seems that this macro is not propagated here by the OpenCV cmake (as this is not core module). +// (Consider using OpenCV's trace.hpp ) +#ifdef OPENCV_WITH_ITT +#include +#endif + +#include +namespace cv { +namespace util { + template< class T > + using remove_reference_t = typename std::remove_reference::type; + + // Home brew ScopeGuard + // D will be called automatically with p as argument when ScopeGuard goes out of scope. + // call release() on the ScopeGuard object to revoke guard action + template + auto make_ptr_guard(T* p, D&& d) -> std::unique_ptr> { + return {p, std::forward(d)}; + } +} // namespace util + +// FIXME: make it more reusable (and move to other place and other namespace) +namespace gimpl { namespace parallel { + #ifdef OPENCV_WITH_ITT + extern const __itt_domain* gapi_itt_domain; + + namespace { + auto make_itt_guard = [](__itt_string_handle* h) { + __itt_task_begin(gapi_itt_domain, __itt_null, __itt_null, (h)); + return util::make_ptr_guard(reinterpret_cast(1), [](int* ) { __itt_task_end(gapi_itt_domain); }); + }; + } // namespace + + #define GAPI_ITT_NAMED_TRACE_GUARD(name, h) auto name = cv::gimpl::parallel::make_itt_guard(h); cv::util::suppress_unused_warning(name) + #else + struct dumb_guard {void reset(){}}; + #define GAPI_ITT_NAMED_TRACE_GUARD(name, h) cv::gimpl::parallel::dumb_guard name; cv::util::suppress_unused_warning(name) + #endif + + #define GAPI_ITT_AUTO_TRACE_GUARD_IMPL_(LINE, h) GAPI_ITT_NAMED_TRACE_GUARD(itt_trace_guard_##LINE, h) + #define GAPI_ITT_AUTO_TRACE_GUARD_IMPL(LINE, h) GAPI_ITT_AUTO_TRACE_GUARD_IMPL_(LINE, h) + #define GAPI_ITT_AUTO_TRACE_GUARD(h) GAPI_ITT_AUTO_TRACE_GUARD_IMPL(__LINE__, h) +}} //gimpl::parallel +} //namespace cv + +#endif /* OPENCV_GAPI_GAPI_ITT_HPP */ diff --git a/modules/gapi/src/executor/gtbbexecutor.cpp b/modules/gapi/src/executor/gtbbexecutor.cpp new file mode 100644 index 0000000000..03c6757dc6 --- /dev/null +++ b/modules/gapi/src/executor/gtbbexecutor.cpp @@ -0,0 +1,445 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2020 Intel Corporation + +#include "gtbbexecutor.hpp" + +#if defined(HAVE_TBB) +#include "gapi_itt.hpp" + +#include +#include +#include "logger.hpp" // GAPI_LOG + +#include +#include // unique_ptr + +#include +#include + +#include + +#define ASSERT(expr) GAPI_DbgAssert(expr) + +#define LOG_INFO(tag, ...) GAPI_LOG_INFO(tag, __VA_ARGS__) +#define LOG_WARNING(tag, ...) GAPI_LOG_WARNING(tag, __VA_ARGS__) +#define LOG_DEBUG(tag, ...) GAPI_LOG_DEBUG(tag, __VA_ARGS__) + + +#ifdef OPENCV_WITH_ITT +const __itt_domain* cv::gimpl::parallel::gapi_itt_domain = __itt_domain_create("GAPI Context"); +#endif + +namespace cv { namespace gimpl { namespace parallel { + +namespace detail { +// some helper staff to deal with tbb::task related entities +namespace tasking { + +enum class use_tbb_scheduler_bypass { + NO, + YES +}; + +inline void assert_graph_is_running(tbb::task* root) { + // tbb::task::wait_for_all block calling thread until task ref_count is dropped to 1 + // So if the root task ref_count is greater than 1 graph still has a job to do and + // according wait_for_all() has not yet returned + ASSERT(root->ref_count() > 1); +} + +// made template to break circular dependencies +template +struct functor_task : tbb::task { + body_t body; + + template + functor_task(arg_t&& a) : body(std::forward(a)) {} + + tbb::task * execute() override { + assert_graph_is_running(parent()); + + auto reuse_current_task = body(); + // if needed, say TBB to execute current task once again + return (use_tbb_scheduler_bypass::YES == reuse_current_task) ? (recycle_as_continuation(), this) : nullptr; + } + ~functor_task() { + assert_graph_is_running(parent()); + } +}; + +template +auto allocate_task(tbb::task* root, body_t const& body) -> functor_task* { + return new(tbb::task::allocate_additional_child_of(*root)) functor_task{body}; +} + +template +void spawn_no_assert(tbb::task* root, body_t const& body) { + tbb::task::spawn(* allocate_task(root, body)); +} + +#ifdef OPENCV_WITH_ITT +namespace { + static __itt_string_handle* ittTbbAddReadyBlocksToQueue = __itt_string_handle_create("add ready blocks to queue"); + static __itt_string_handle* ittTbbSpawnReadyBlocks = __itt_string_handle_create("spawn ready blocks"); + static __itt_string_handle* ittTbbEnqueueSpawnReadyBlocks = __itt_string_handle_create("enqueueing a spawn of ready blocks"); + static __itt_string_handle* ittTbbUnlockMasterThread = __itt_string_handle_create("Unlocking master thread"); +} +#endif // OPENCV_WITH_ITT + + +template +void batch_spawn(size_t count, tbb::task* root, body_t const& body, bool do_assert_graph_is_running = true) { + GAPI_ITT_AUTO_TRACE_GUARD(ittTbbSpawnReadyBlocks); + if (do_assert_graph_is_running) { + assert_graph_is_running(root); + } + + for (size_t i=0; i; + +root_t inline create_root(tbb::task_group_context& ctx) { + root_t root{new (tbb::task::allocate_root(ctx)) tbb::empty_task}; + root->set_ref_count(1); // required by wait_for_all, as it waits until counter drops to 1 + return root; +} + +std::size_t inline tg_context_traits() { + // Specify tbb::task_group_context::concurrent_wait in the traits to ask TBB scheduler not to change + // ref_count of the task we wait on (root) when wait is complete. + return tbb::task_group_context::default_traits | tbb::task_group_context::concurrent_wait; +} + +} // namespace tasking + +namespace async { +struct async_tasks_t { + std::atomic count {0}; + std::condition_variable cv; + std::mutex mtx; +}; + +enum class wake_tbb_master { + NO, + YES +}; + +void inline wake_master(async_tasks_t& async_tasks, wake_tbb_master wake_master) { + // TODO: seems that this can be relaxed + auto active_async_tasks = --async_tasks.count; + + if ((active_async_tasks == 0) || (wake_master == wake_tbb_master::YES)) { + // Was the last async task or asked to wake TBB master up(e.g. there are new TBB tasks to execute) + GAPI_ITT_AUTO_TRACE_GUARD(ittTbbUnlockMasterThread); + // While decrement of async_tasks_t::count is atomic, it might occur after the waiting + // thread has read its value but _before_ it actually starts waiting on the condition variable. + // So, lock acquire is needed to guarantee that current condition check (if any) in the waiting thread + // (possibly ran in parallel to async_tasks_t::count decrement above) is completed _before_ signal is issued. + // Therefore when notify_one is called, waiting thread is either sleeping on the condition variable or + // running a new check which is guaranteed to pick the new value and return from wait(). + + // There is no need to _hold_ the lock while signaling, only to acquire it. + std::unique_lock {async_tasks.mtx}; // Acquire and release the lock. + async_tasks.cv.notify_one(); + } +} + +struct master_thread_sleep_lock_t +{ + struct sleep_unlock { + void operator()(async_tasks_t* t) const { + ASSERT(t); + wake_master(*t, wake_tbb_master::NO); + } + }; + + std::unique_ptr guard; + + master_thread_sleep_lock_t() = default; + master_thread_sleep_lock_t(async_tasks_t* async_tasks_ptr) : guard(async_tasks_ptr) { + // TODO: seems that this can be relaxed + ++(guard->count); + } + + void unlock(wake_tbb_master wake) { + if (auto* p = guard.release()) { + wake_master(*p, wake); + } + } +}; + +master_thread_sleep_lock_t inline lock_sleep_master(async_tasks_t& async_tasks) { + return {&async_tasks}; +} + +enum class is_tbb_work_present { + NO, + YES +}; + +//RAII object to block TBB master thread (one that does wait_for_all()) +//N.B. :wait_for_all() return control when root ref_count drops to 1, +struct root_wait_lock_t { + struct root_decrement_ref_count{ + void operator()(tbb::task* t) const { + ASSERT(t); + auto result = t->decrement_ref_count(); + ASSERT(result >= 1); + } + }; + + std::unique_ptr guard; + + root_wait_lock_t() = default; + root_wait_lock_t(tasking::root_t& root, is_tbb_work_present& previous_state) : guard{root.get()} { + // Block the master thread while the *this object is alive. + auto new_root_ref_count = root->add_ref_count(1); + previous_state = (new_root_ref_count == 2) ? is_tbb_work_present::NO : is_tbb_work_present::YES; + } + +}; + +root_wait_lock_t inline lock_wait_master(tasking::root_t& root, is_tbb_work_present& previous_state) { + return root_wait_lock_t{root, previous_state}; +} + +} // namespace async + +inline tile_node* pop(prio_items_queue_t& q) { + tile_node* node = nullptr; + bool popped = q.try_pop(node); + ASSERT(popped && "queue should be non empty as we push items to it before we spawn"); + return node; +} + +namespace graph { + // Returns : number of items actually pushed into the q + std::size_t inline push_ready_dependants(prio_items_queue_t& q, tile_node* node) { + GAPI_ITT_AUTO_TRACE_GUARD(ittTbbAddReadyBlocksToQueue); + std::size_t ready_items = 0; + // enable dependent tasks + for (auto* dependant : node->dependants) { + // fetch_and_sub returns previous value + if (1 == dependant->dependency_count.fetch_sub(1)) { + // tile node is ready for execution, add it to the queue + q.push(dependant); + ++ready_items; + } + } + return ready_items; + } + + struct exec_ctx { + tbb::task_arena& arena; + prio_items_queue_t& q; + tbb::task_group_context tg_ctx; + tasking::root_t root; + detail::async::async_tasks_t async_tasks; + std::atomic executed {0}; + + exec_ctx(tbb::task_arena& arena_, prio_items_queue_t& q_) + : arena(arena_), q(q_), + // As the traits is last argument, explicitly specify (default) value for first argument + tg_ctx{tbb::task_group_context::bound, tasking::tg_context_traits()}, + root(tasking::create_root(tg_ctx)) + {} + }; + + // At the moment there are no suitable tools to manage TBB priorities on task by task basis. + // Instead priority queue is used to respect tile_node priorities. + // As well, TBB task is not bound to any particular tile_node until actually executed. + + // Strictly speaking there are two graphs here: + // - G-API one, described by the connected tile_node instances. + // This graph is : + // - Known beforehand, and do not change during the execution (i.e. static) + // - Contains both TBB non-TBB parts + // - prioritized, (i.e. all nodes has assigned priority of execution) + // + // - TBB task tree, which is : + // - flat (Has only two levels : root and leaves) + // - dynamic, i.e. new leaves are added on demand when new tbb tasks are spawned + // - describes only TBB/CPU part of the whole graph + // - non-prioritized (i.e. all tasks are created equal) + + // Class below represents TBB task payload. + // + // Each instance basically does the three things : + // 1. Gets the tile_node item from the top of the queue + // 2. Executes its body + // 3. Pushes dependent tile_nodes to the queue once they are ready + // + struct task_body { + exec_ctx& ctx; + + std::size_t push_ready_dependants(tile_node* node) const { + return graph::push_ready_dependants(ctx.q, node); + } + + void spawn_clones(std::size_t items) const { + tasking::batch_spawn(items, ctx.root.get(), *this); + } + + task_body(exec_ctx& ctx_) : ctx(ctx_) {} + tasking::use_tbb_scheduler_bypass operator()() const { + ASSERT(!ctx.q.empty() && "Spawned task with no job to do ? "); + + tile_node* node = detail::pop(ctx.q); + + auto result = tasking::use_tbb_scheduler_bypass::NO; + // execute the task + + if (auto p = util::get_if(&(node->task_body))) { + // synchronous task + p->body(); + + std::size_t ready_items = push_ready_dependants(node); + + if (ready_items > 0) { + // spawn one less tasks and say TBB to reuse(recycle) current task + spawn_clones(ready_items - 1); + result = tasking::use_tbb_scheduler_bypass::YES; + } + } + else { + LOG_DEBUG(NULL, "Async task"); + using namespace detail::async; + using util::copy_through_move; + + auto block_master = copy_through_move(lock_sleep_master(ctx.async_tasks)); + + auto self_copy = *this; + auto callback = [node, block_master, self_copy] () mutable /*due to block_master.get().unlock()*/ { + LOG_DEBUG(NULL, "Async task callback is called"); + // Implicitly unlock master right in the end of callback + auto master_sleep_lock = std::move(block_master); + std::size_t ready_items = self_copy.push_ready_dependants(node); + if (ready_items > 0) { + auto master_was_active = is_tbb_work_present::NO; + { + GAPI_ITT_AUTO_TRACE_GUARD(ittTbbEnqueueSpawnReadyBlocks); + // Force master thread (one that does wait_for_all()) to (actively) wait for enqueued tasks + // and unlock it right after all dependent tasks are spawned. + + auto root_wait_lock = copy_through_move(lock_wait_master(self_copy.ctx.root, master_was_active)); + + // TODO: add test to cover proper holding of root_wait_lock + // As the calling thread most likely is not TBB one, instead of spawning TBB tasks directly we + // enqueue a task which will spawn them. + // For master thread to not leave wait_for_all() prematurely, + // hold the root_wait_lock until need tasks are actually spawned. + self_copy.ctx.arena.enqueue([ready_items, self_copy, root_wait_lock]() { + self_copy.spawn_clones(ready_items); + // TODO: why we need this? Either write a descriptive comment or remove it + volatile auto unused = root_wait_lock.get().guard.get(); + util::suppress_unused_warning(unused); + }); + } + // Wake master thread (if any) to pick up the enqueued tasks iff: + // 1. there is new TBB work to do, and + // 2. Master thread was sleeping on condition variable waiting for async tasks to complete + // (There was no active work before (i.e. root->ref_count() was == 1)) + auto wake_master = (master_was_active == is_tbb_work_present::NO) ? + wake_tbb_master::YES : wake_tbb_master::NO; + master_sleep_lock.get().unlock(wake_master); + } + }; + + auto& body = util::get(node->task_body).body; + body(std::move(callback), node->total_order_index); + } + + ctx.executed++; + // reset dependecy_count to initial state to simplify re-execution of the same graph + node->dependency_count = node->dependencies; + + return result; + } + }; +} +} // namespace detail +}}} // namespace cv::gimpl::parallel + +void cv::gimpl::parallel::execute(prio_items_queue_t& q) { + // get the reference to current task_arena (i.e. one we are running in) +#if TBB_INTERFACE_VERSION > 9002 + using attach_t = tbb::task_arena::attach; +#else + using attach_t = tbb::internal::attach; +#endif + + tbb::task_arena arena{attach_t{}}; + execute(q, arena); +} + +void cv::gimpl::parallel::execute(prio_items_queue_t& q, tbb::task_arena& arena) { + using namespace detail; + graph::exec_ctx ctx{arena, q}; + + arena.execute( + [&]() { + // Passed in queue is assumed to contain starting tasks, i.e. ones with no (or resolved) dependencies + auto num_start_tasks = q.size(); + + // TODO: use recursive spawning and task soft affinity for faster task distribution + // As graph is starting and no task has been spawned yet + // assert_graph_is_running(root) will not hold, so spawn without assert + tasking::batch_spawn(num_start_tasks, ctx.root.get(), graph::task_body{ctx}, /* assert_graph_is_running*/false); + + using namespace std::chrono; + high_resolution_clock timer; + + auto tbb_work_done = [&ctx]() { return 1 == ctx.root->ref_count(); }; + auto async_work_done = [&ctx]() { return 0 == ctx.async_tasks.count; }; + do { + // First participate in execution of TBB graph till there are no more ready tasks. + ctx.root->wait_for_all(); + + if (!async_work_done()) { // Wait on the conditional variable iff there is active async work + auto start = timer.now(); + std::unique_lock lk(ctx.async_tasks.mtx); + // Wait (probably by sleeping) until all async tasks are completed or new TBB tasks are created. + // FIXME: Use TBB resumable tasks here to avoid blocking TBB thread + ctx.async_tasks.cv.wait(lk, [&]{return async_work_done() || !tbb_work_done() ;}); + + LOG_INFO(NULL, "Slept for " << duration_cast(timer.now() - start).count() << " ms \n"); + } + } + while(!tbb_work_done() || !async_work_done()); + + ASSERT(tbb_work_done() && async_work_done() && "Graph is still running?"); + } + ); + + LOG_INFO(NULL, "Done. Executed " << ctx.executed << " tasks"); +} + +std::ostream& cv::gimpl::parallel::operator<<(std::ostream& o, tile_node const& n) { + o << "(" + << " at:" << &n << "," + << "indx: " << n.total_order_index << "," + << "deps #:" << n.dependency_count.value << ", " + << "prods:" << n.dependants.size(); + + o << "["; + for (auto* d: n.dependants) { + o << d << ","; + } + o << "]"; + + o << ")"; + return o; +} + +#endif // HAVE_TBB diff --git a/modules/gapi/src/executor/gtbbexecutor.hpp b/modules/gapi/src/executor/gtbbexecutor.hpp new file mode 100644 index 0000000000..8a62266f66 --- /dev/null +++ b/modules/gapi/src/executor/gtbbexecutor.hpp @@ -0,0 +1,103 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2020 Intel Corporation + +#ifndef OPENCV_GAPI_TBB_EXECUTOR_HPP +#define OPENCV_GAPI_TBB_EXECUTOR_HPP + +#if !defined(GAPI_STANDALONE) +#include +#endif + +#if defined(HAVE_TBB) + +#include +#include +#include +#include + +#include +#include + +#include + +namespace cv { namespace gimpl { namespace parallel { + +// simple wrapper to allow copies of std::atomic +template +struct atomic_copyable_wrapper { + std::atomic value; + + atomic_copyable_wrapper(count_t val) : value(val) {} + atomic_copyable_wrapper(atomic_copyable_wrapper const& lhs) : value (lhs.value.load(std::memory_order_relaxed)) {} + + atomic_copyable_wrapper& operator=(count_t val) { + value.store(val, std::memory_order_relaxed); + return *this; + } + + count_t fetch_sub(count_t val) { + return value.fetch_sub(val); + } + + count_t fetch_add(count_t val) { + return value.fetch_add(val); + } +}; + +struct async_tag {}; +constexpr async_tag async; + +// Class describing a piece of work in the node in the tasks graph. +// Most of the fields are set only once during graph compilation and never changes. +// (However at the moment they can not be made const due to two phase initialization +// of the tile_node objects) +// FIXME: refactor the code to make the const? +struct tile_node { + // place in totally ordered queue of tasks to execute. Inverse to priority, i.e. + // lower index means higher priority + size_t total_order_index = 0; + + // FIXME: use templates here instead of std::function + struct sync_task_body { + std::function body; + }; + struct async_task_body { + std::function&& callback, size_t total_order_index)> body; + }; + + util::variant task_body; + + // number of dependencies according to a dependency graph (i.e. number of "input" edges). + size_t dependencies = 0; + + // number of unsatisfied dependencies. When drops to zero task is ready for execution. + // Initially equal to "dependencies" + atomic_copyable_wrapper dependency_count = 0; + + std::vector dependants; + + tile_node(decltype(sync_task_body::body)&& f) : task_body(sync_task_body{std::move(f)}) {}; + tile_node(async_tag, decltype(async_task_body::body)&& f) : task_body(async_task_body{std::move(f)}) {}; +}; + +std::ostream& operator<<(std::ostream& o, tile_node const& n); + +struct tile_node_indirect_priority_comparator { + bool operator()(tile_node const * lhs, tile_node const * rhs) const { + return lhs->total_order_index > rhs->total_order_index; + } +}; + +using prio_items_queue_t = tbb::concurrent_priority_queue; + +void execute(prio_items_queue_t& q); +void execute(prio_items_queue_t& q, tbb::task_arena& arena); + +}}} // namespace cv::gimpl::parallel + +#endif // HAVE_TBB + +#endif // OPENCV_GAPI_TBB_EXECUTOR_HPP diff --git a/modules/gapi/test/executor/gtbbexecutor_internal_tests.cpp b/modules/gapi/test/executor/gtbbexecutor_internal_tests.cpp new file mode 100644 index 0000000000..d793683f94 --- /dev/null +++ b/modules/gapi/test/executor/gtbbexecutor_internal_tests.cpp @@ -0,0 +1,172 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2020 Intel Corporation + +// Deliberately include .cpp file instead of header as we use non exported function (execute) +#include + +#if defined(HAVE_TBB) + +#include "../test_precomp.hpp" +#include +#include + +namespace { + tbb::task_arena create_task_arena(int max_concurrency = tbb::task_arena::automatic /* set to 1 for single thread */) { + unsigned int reserved_for_master_threads = 1; + if (max_concurrency == 1) { + // Leave no room for TBB worker threads, by reserving all to masters. + // TBB runtime guarantees that no worker threads will join the arena + // if max_concurrency is equal to reserved_for_master_threads + // except 1:1 + use of enqueued tasks for safety guarantee. + // So deliberately make it 2:2 to force TBB not to create extra thread. + // + // N.B. one slot will left empty as only one master thread(one that + // calls root->wait_for_all()) will join the arena. + + // FIXME: strictly speaking master can take any free slot, not the first one. + // However at the moment master seems to pick 0 slot all the time. + max_concurrency = 2; + reserved_for_master_threads = 2; + } + return tbb::task_arena{max_concurrency, reserved_for_master_threads}; + } +} + +namespace opencv_test { + +TEST(TBBExecutor, Basic) { + using namespace cv::gimpl::parallel; + bool executed = false; + prio_items_queue_t q; + tile_node n([&]() { + executed = true; + }); + q.push(&n); + execute(q); + EXPECT_EQ(true, executed); +} + +TEST(TBBExecutor, SerialExecution) { + using namespace cv::gimpl::parallel; + const int n = 10; + prio_items_queue_t q; + std::vector nodes; nodes.reserve(n+1); + std::vector thread_id(n); + for (int i=0; i (std::count(thread_id.begin(), thread_id.end(), thread_id[0]))) + << print_thread_ids(); +} + +TEST(TBBExecutor, AsyncBasic) { + using namespace cv::gimpl::parallel; + + std::atomic callback_ready {false}; + std::function callback; + + std::atomic callback_called {false}; + std::atomic master_is_waiting {true}; + std::atomic master_was_blocked_until_callback_called {false}; + + auto async_thread = std::thread([&] { + bool slept = false; + while (!callback_ready) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + slept = true; + } + if (!slept) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + callback(); + callback_called = true; + master_was_blocked_until_callback_called = (master_is_waiting == true); + }); + + auto async_task_body = [&](std::function&& cb, size_t /*total_order_index*/) { + callback = std::move(cb); + callback_ready = true; + }; + tile_node n(async, std::move(async_task_body)); + + prio_items_queue_t q; + q.push(&n); + execute(q); + master_is_waiting = false; + + async_thread.join(); + + EXPECT_EQ(true, callback_called); + EXPECT_EQ(true, master_was_blocked_until_callback_called); +} + +TEST(TBBExecutor, Dependencies) { + using namespace cv::gimpl::parallel; + const int n = 10; + bool serial = true; + std::atomic counter {0}; + prio_items_queue_t q; + std::vector nodes; nodes.reserve(n+1); + const int invalid_order = -10; + std::vector tiles_exec_order(n, invalid_order); + + auto add_dependency_to = [](tile_node& node, tile_node& dependency) { + dependency.dependants.push_back(&node); + node.dependencies++; + node.dependency_count.fetch_add(1); + }; + for (int i=0; i 0) { + auto last_node = nodes.end() - 1; + add_dependency_to(*last_node, *(last_node -1)); + } + } + + q.push(&nodes.front()); + + auto arena = serial ? create_task_arena(1) : create_task_arena(); + execute(q, arena); + auto print_execution_order = [&] { + std::stringstream str; + for (auto& i : tiles_exec_order) { str << i <<" ";} + return str.str(); + }; + ASSERT_EQ(0, std::count(tiles_exec_order.begin(), tiles_exec_order.end(), invalid_order)) + << "Not all " << n << " task executed ?\n" + <<" execution order : " << print_execution_order(); + + for (size_t i=0; i Date: Mon, 30 Nov 2020 16:18:43 +0300 Subject: [PATCH 008/422] Merge pull request #18857 from OrestChura:oc/kmeans [G-API]: kmeans() Standard Kernel Implementation * cv::gapi::kmeans kernel implementation - 4 overloads: - standard GMat - for any dimensionality - GMat without bestLabels initialization - GArray - for 2D - GArray - for 3D - Accuracy tests: - for every input - 2 tests 1) without initializing. In this case, no comparison with cv::kmeans is done as kmeans uses random auto-initialization 2) with initialization - in both cases, only 1 attempt is done as after first attempt kmeans initializes bestLabels randomly * Addressing comments - bestLabels is returned to its original place among parameters - checkVector and isPointsVector functions are merged into one, shared between core.hpp & imgproc.hpp by placing it into gmat.hpp (and implementation - to gmat.cpp) - typos corrected * addressing comments - unified names in tests - const added - typos * Addressing comments - fixed the doc note - ddepth -> expectedDepth, `< 0 ` -> `== -1` * Fix unsupported cases of input Mat - supported: multiple channels, reversed width - added test cases for those - added notes in docs - refactored checkVector to return dimentionality along with quantity * Addressing comments - makes chackVector smaller and (maybe) clearer * Addressing comments * Addressing comments - cv::checkVector -> cv::gapi::detail * Addressing comments - Changed checkVector: returns bool, quantity & dimensionality as references * Addressing comments - Polishing checkVector - FIXME added * Addressing discussion - checkVector: added overload, separate two different functionalities - depth assert - out of the function * Addressing comments - quantity -> amount, dimensionality -> dim - Fix typos * Addressing comments - fix docs - use 2 variable's definitions instead of one (for all non-trivial variables) --- modules/gapi/include/opencv2/gapi/core.hpp | 145 +++++++++++++ modules/gapi/include/opencv2/gapi/gmat.hpp | 21 ++ modules/gapi/include/opencv2/gapi/imgproc.hpp | 32 +-- modules/gapi/src/api/gmat.cpp | 32 +++ modules/gapi/src/api/kernels_core.cpp | 34 ++++ modules/gapi/src/backends/cpu/gcpucore.cpp | 61 ++++++ modules/gapi/test/common/gapi_core_tests.hpp | 10 + .../gapi/test/common/gapi_core_tests_inl.hpp | 191 ++++++++++++++++++ .../gapi/test/common/gapi_tests_common.hpp | 22 ++ modules/gapi/test/cpu/gapi_core_tests_cpu.cpp | 66 ++++++ 10 files changed, 599 insertions(+), 15 deletions(-) diff --git a/modules/gapi/include/opencv2/gapi/core.hpp b/modules/gapi/include/opencv2/gapi/core.hpp index 8825585696..1e3eee86c3 100644 --- a/modules/gapi/include/opencv2/gapi/core.hpp +++ b/modules/gapi/include/opencv2/gapi/core.hpp @@ -26,6 +26,7 @@ @defgroup gapi_transform Graph API: Image and channel composition functions @} */ + namespace cv { namespace gapi { namespace core { using GMat2 = std::tuple; @@ -508,6 +509,77 @@ namespace core { return in.withType(in.depth, in.chan).withSize(dsize); } }; + + G_TYPED_KERNEL( + GKMeansND, + ,GMat,GMat>(GMat,int,GMat,TermCriteria,int,KmeansFlags)>, + "org.opencv.core.kmeansND") { + + static std::tuple + outMeta(const GMatDesc& in, int K, const GMatDesc& bestLabels, const TermCriteria&, int, + KmeansFlags flags) { + GAPI_Assert(in.depth == CV_32F); + std::vector amount_n_dim = detail::checkVector(in); + int amount = amount_n_dim[0], dim = amount_n_dim[1]; + if (amount == -1) // Mat with height != 1, width != 1, channels != 1 given + { // which means that kmeans will consider the following: + amount = in.size.height; + dim = in.size.width * in.chan; + } + // kmeans sets these labels' sizes when no bestLabels given: + GMatDesc out_labels(CV_32S, 1, Size{1, amount}); + // kmeans always sets these centers' sizes: + GMatDesc centers (CV_32F, 1, Size{dim, K}); + if (flags & KMEANS_USE_INITIAL_LABELS) + { + GAPI_Assert(bestLabels.depth == CV_32S); + int labels_amount = detail::checkVector(bestLabels, 1u); + GAPI_Assert(labels_amount == amount); + out_labels = bestLabels; // kmeans preserves bestLabels' sizes if given + } + return std::make_tuple(empty_gopaque_desc(), out_labels, centers); + } + }; + + G_TYPED_KERNEL( + GKMeansNDNoInit, + ,GMat,GMat>(GMat,int,TermCriteria,int,KmeansFlags)>, + "org.opencv.core.kmeansNDNoInit") { + + static std::tuple + outMeta(const GMatDesc& in, int K, const TermCriteria&, int, KmeansFlags flags) { + GAPI_Assert( !(flags & KMEANS_USE_INITIAL_LABELS) ); + GAPI_Assert(in.depth == CV_32F); + std::vector amount_n_dim = detail::checkVector(in); + int amount = amount_n_dim[0], dim = amount_n_dim[1]; + if (amount == -1) // Mat with height != 1, width != 1, channels != 1 given + { // which means that kmeans will consider the following: + amount = in.size.height; + dim = in.size.width * in.chan; + } + GMatDesc out_labels(CV_32S, 1, Size{1, amount}); + GMatDesc centers (CV_32F, 1, Size{dim, K}); + return std::make_tuple(empty_gopaque_desc(), out_labels, centers); + } + }; + + G_TYPED_KERNEL(GKMeans2D, ,GArray,GArray> + (GArray,int,GArray,TermCriteria,int,KmeansFlags)>, + "org.opencv.core.kmeans2D") { + static std::tuple + outMeta(const GArrayDesc&,int,const GArrayDesc&,const TermCriteria&,int,KmeansFlags) { + return std::make_tuple(empty_gopaque_desc(), empty_array_desc(), empty_array_desc()); + } + }; + + G_TYPED_KERNEL(GKMeans3D, ,GArray,GArray> + (GArray,int,GArray,TermCriteria,int,KmeansFlags)>, + "org.opencv.core.kmeans3D") { + static std::tuple + outMeta(const GArrayDesc&,int,const GArrayDesc&,const TermCriteria&,int,KmeansFlags) { + return std::make_tuple(empty_gopaque_desc(), empty_array_desc(), empty_array_desc()); + } + }; } // namespace core namespace streaming { @@ -1757,6 +1829,79 @@ GAPI_EXPORTS GMat warpAffine(const GMat& src, const Mat& M, const Size& dsize, i int borderMode = cv::BORDER_CONSTANT, const Scalar& borderValue = Scalar()); //! @} gapi_transform +/** @brief Finds centers of clusters and groups input samples around the clusters. + +The function kmeans implements a k-means algorithm that finds the centers of K clusters +and groups the input samples around the clusters. As an output, \f$\texttt{bestLabels}_i\f$ +contains a 0-based cluster index for the \f$i^{th}\f$ sample. + +@note + - Function textual ID is "org.opencv.core.kmeansND" + - In case of an N-dimentional points' set given, input GMat can have the following traits: +2 dimensions, a single row or column if there are N channels, +or N columns if there is a single channel. Mat should have @ref CV_32F depth. + - Although, if GMat with height != 1, width != 1, channels != 1 given as data, n-dimensional +samples are considered given in amount of A, where A = height, n = width * channels. + - In case of GMat given as data: + - the output labels are returned as 1-channel GMat with sizes +width = 1, height = A, where A is samples amount, or width = bestLabels.width, +height = bestLabels.height if bestLabels given; + - the cluster centers are returned as 1-channel GMat with sizes +width = n, height = K, where n is samples' dimentionality and K is clusters' amount. + - As one of possible usages, if you want to control the initial labels for each attempt +by yourself, you can utilize just the core of the function. To do that, set the number +of attempts to 1, initialize labels each time using a custom algorithm, pass them with the +( flags = #KMEANS_USE_INITIAL_LABELS ) flag, and then choose the best (most-compact) clustering. + +@param data Data for clustering. An array of N-Dimensional points with float coordinates is needed. +Function can take GArray, GArray for 2D and 3D cases or GMat for any +dimentionality and channels. +@param K Number of clusters to split the set by. +@param bestLabels Optional input integer array that can store the supposed initial cluster indices +for every sample. Used when ( flags = #KMEANS_USE_INITIAL_LABELS ) flag is set. +@param criteria The algorithm termination criteria, that is, the maximum number of iterations +and/or the desired accuracy. The accuracy is specified as criteria.epsilon. As soon as each of +the cluster centers moves by less than criteria.epsilon on some iteration, the algorithm stops. +@param attempts Flag to specify the number of times the algorithm is executed using different +initial labellings. The algorithm returns the labels that yield the best compactness (see the first +function return value). +@param flags Flag that can take values of cv::KmeansFlags . + +@return + - Compactness measure that is computed as +\f[\sum _i \| \texttt{samples} _i - \texttt{centers} _{ \texttt{labels} _i} \| ^2\f] +after every attempt. The best (minimum) value is chosen and the corresponding labels and the +compactness value are returned by the function. + - Integer array that stores the cluster indices for every sample. + - Array of the cluster centers. +*/ +GAPI_EXPORTS std::tuple,GMat,GMat> +kmeans(const GMat& data, const int K, const GMat& bestLabels, + const TermCriteria& criteria, const int attempts, const KmeansFlags flags); + +/** @overload +@note + - Function textual ID is "org.opencv.core.kmeansNDNoInit" + - #KMEANS_USE_INITIAL_LABELS flag must not be set while using this overload. + */ +GAPI_EXPORTS std::tuple,GMat,GMat> +kmeans(const GMat& data, const int K, const TermCriteria& criteria, const int attempts, + const KmeansFlags flags); + +/** @overload +@note Function textual ID is "org.opencv.core.kmeans2D" + */ +GAPI_EXPORTS std::tuple,GArray,GArray> +kmeans(const GArray& data, const int K, const GArray& bestLabels, + const TermCriteria& criteria, const int attempts, const KmeansFlags flags); + +/** @overload +@note Function textual ID is "org.opencv.core.kmeans3D" + */ +GAPI_EXPORTS std::tuple,GArray,GArray> +kmeans(const GArray& data, const int K, const GArray& bestLabels, + const TermCriteria& criteria, const int attempts, const KmeansFlags flags); + namespace streaming { /** @brief Gets dimensions from Mat. diff --git a/modules/gapi/include/opencv2/gapi/gmat.hpp b/modules/gapi/include/opencv2/gapi/gmat.hpp index f441413be5..20a10db92b 100644 --- a/modules/gapi/include/opencv2/gapi/gmat.hpp +++ b/modules/gapi/include/opencv2/gapi/gmat.hpp @@ -203,6 +203,27 @@ struct GAPI_EXPORTS GMatDesc static inline GMatDesc empty_gmat_desc() { return GMatDesc{-1,-1,{-1,-1}}; } +namespace gapi { namespace detail { +/** Checks GMatDesc fields if the passed matrix is a set of n-dimentional points. +@param in GMatDesc to check. +@param n expected dimensionality. +@return the amount of points. In case input matrix can't be described as vector of points +of expected dimensionality, returns -1. + */ +int checkVector(const GMatDesc& in, const size_t n); + +/** @overload + +Checks GMatDesc fields if the passed matrix can be described as a set of points of any +dimensionality. + +@return array of two elements in form of std::vector: the amount of points +and their calculated dimensionality. In case input matrix can't be described as vector of points, +returns {-1, -1}. + */ +std::vector checkVector(const GMatDesc& in); +}} // namespace gapi::detail + #if !defined(GAPI_STANDALONE) GAPI_EXPORTS GMatDesc descr_of(const cv::UMat &mat); #endif // !defined(GAPI_STANDALONE) diff --git a/modules/gapi/include/opencv2/gapi/imgproc.hpp b/modules/gapi/include/opencv2/gapi/imgproc.hpp index 7435ec1e1d..46b53c0416 100644 --- a/modules/gapi/include/opencv2/gapi/imgproc.hpp +++ b/modules/gapi/include/opencv2/gapi/imgproc.hpp @@ -43,15 +43,6 @@ void validateFindingContoursMeta(const int depth, const int chan, const int mode break; } } - -// Checks if the passed mat is a set of n-dimentional points of the given depth -bool isPointsVector(const int chan, const cv::Size &size, const int depth, - const int n, const int ddepth = -1) -{ - return (ddepth == depth || ddepth < 0) && - ((chan == n && (size.height == 1 || size.width == 1)) || - (chan == 1 && size.width == n)); -} } // anonymous namespace namespace cv { namespace gapi { @@ -212,10 +203,17 @@ namespace imgproc { G_TYPED_KERNEL(GBoundingRectMat, (GMat)>, "org.opencv.imgproc.shape.boundingRectMat") { static GOpaqueDesc outMeta(GMatDesc in) { - GAPI_Assert((in.depth == CV_8U && in.chan == 1) || - (isPointsVector(in.chan, in.size, in.depth, 2, CV_32S) || - isPointsVector(in.chan, in.size, in.depth, 2, CV_32F))); - + if (in.depth == CV_8U) + { + GAPI_Assert(in.chan == 1); + } + else + { + GAPI_Assert (in.depth == CV_32S || in.depth == CV_32F); + int amount = detail::checkVector(in, 2u); + GAPI_Assert(amount != -1 && + "Input Mat can't be described as vector of 2-dimentional points"); + } return empty_gopaque_desc(); } }; @@ -237,7 +235,9 @@ namespace imgproc { G_TYPED_KERNEL(GFitLine2DMat, (GMat,DistanceTypes,double,double,double)>, "org.opencv.imgproc.shape.fitLine2DMat") { static GOpaqueDesc outMeta(GMatDesc in,DistanceTypes,double,double,double) { - GAPI_Assert(isPointsVector(in.chan, in.size, in.depth, 2, -1)); + int amount = detail::checkVector(in, 2u); + GAPI_Assert(amount != -1 && + "Input Mat can't be described as vector of 2-dimentional points"); return empty_gopaque_desc(); } }; @@ -269,7 +269,9 @@ namespace imgproc { G_TYPED_KERNEL(GFitLine3DMat, (GMat,DistanceTypes,double,double,double)>, "org.opencv.imgproc.shape.fitLine3DMat") { static GOpaqueDesc outMeta(GMatDesc in,int,double,double,double) { - GAPI_Assert(isPointsVector(in.chan, in.size, in.depth, 3, -1)); + int amount = detail::checkVector(in, 3u); + GAPI_Assert(amount != -1 && + "Input Mat can't be described as vector of 3-dimentional points"); return empty_gopaque_desc(); } }; diff --git a/modules/gapi/src/api/gmat.cpp b/modules/gapi/src/api/gmat.cpp index 08bb170a86..47a246c293 100644 --- a/modules/gapi/src/api/gmat.cpp +++ b/modules/gapi/src/api/gmat.cpp @@ -36,6 +36,38 @@ const cv::GOrigin& cv::GMat::priv() const return *m_priv; } +static std::vector checkVectorImpl(const int width, const int height, const int chan, + const int n) +{ + if (width == 1 && (n == -1 || n == chan)) + { + return {height, chan}; + } + else if (height == 1 && (n == -1 || n == chan)) + { + return {width, chan}; + } + else if (chan == 1 && (n == -1 || n == width)) + { + return {height, width}; + } + else // input Mat can't be described as vector of points of given dimensionality + { + return {-1, -1}; + } +} + +int cv::gapi::detail::checkVector(const cv::GMatDesc& in, const size_t n) +{ + GAPI_Assert(n != 0u); + return checkVectorImpl(in.size.width, in.size.height, in.chan, static_cast(n))[0]; +} + +std::vector cv::gapi::detail::checkVector(const cv::GMatDesc& in) +{ + return checkVectorImpl(in.size.width, in.size.height, in.chan, -1); +} + namespace{ template cv::GMetaArgs vec_descr_of(const std::vector &vec) { diff --git a/modules/gapi/src/api/kernels_core.cpp b/modules/gapi/src/api/kernels_core.cpp index 82aceb1f26..15af915bdd 100644 --- a/modules/gapi/src/api/kernels_core.cpp +++ b/modules/gapi/src/api/kernels_core.cpp @@ -388,6 +388,40 @@ GMat warpAffine(const GMat& src, const Mat& M, const Size& dsize, int flags, return core::GWarpAffine::on(src, M, dsize, flags, borderMode, borderValue); } +std::tuple,GMat,GMat> kmeans(const GMat& data, const int K, const GMat& bestLabels, + const TermCriteria& criteria, const int attempts, + const KmeansFlags flags) +{ + return core::GKMeansND::on(data, K, bestLabels, criteria, attempts, flags); +} + +std::tuple,GMat,GMat> kmeans(const GMat& data, const int K, + const TermCriteria& criteria, const int attempts, + const KmeansFlags flags) +{ + return core::GKMeansNDNoInit::on(data, K, criteria, attempts, flags); +} + +std::tuple,GArray,GArray> kmeans(const GArray& data, + const int K, + const GArray& bestLabels, + const TermCriteria& criteria, + const int attempts, + const KmeansFlags flags) +{ + return core::GKMeans2D::on(data, K, bestLabels, criteria, attempts, flags); +} + +std::tuple,GArray,GArray> kmeans(const GArray& data, + const int K, + const GArray& bestLabels, + const TermCriteria& criteria, + const int attempts, + const KmeansFlags flags) +{ + return core::GKMeans3D::on(data, K, bestLabels, criteria, attempts, flags); +} + GOpaque streaming::size(const GMat& src) { return streaming::GSize::on(src); diff --git a/modules/gapi/src/backends/cpu/gcpucore.cpp b/modules/gapi/src/backends/cpu/gcpucore.cpp index fc460149c6..3e6ce1c1d4 100644 --- a/modules/gapi/src/backends/cpu/gcpucore.cpp +++ b/modules/gapi/src/backends/cpu/gcpucore.cpp @@ -585,6 +585,63 @@ GAPI_OCV_KERNEL(GCPUWarpAffine, cv::gapi::core::GWarpAffine) } }; +GAPI_OCV_KERNEL(GCPUKMeansND, cv::gapi::core::GKMeansND) +{ + static void run(const cv::Mat& data, const int K, const cv::Mat& inBestLabels, + const cv::TermCriteria& criteria, const int attempts, + const cv::KmeansFlags flags, + double& compactness, cv::Mat& outBestLabels, cv::Mat& centers) + { + if (flags & cv::KMEANS_USE_INITIAL_LABELS) + { + inBestLabels.copyTo(outBestLabels); + } + compactness = cv::kmeans(data, K, outBestLabels, criteria, attempts, flags, centers); + } +}; + +GAPI_OCV_KERNEL(GCPUKMeansNDNoInit, cv::gapi::core::GKMeansNDNoInit) +{ + static void run(const cv::Mat& data, const int K, const cv::TermCriteria& criteria, + const int attempts, const cv::KmeansFlags flags, + double& compactness, cv::Mat& outBestLabels, cv::Mat& centers) + { + compactness = cv::kmeans(data, K, outBestLabels, criteria, attempts, flags, centers); + } +}; + +GAPI_OCV_KERNEL(GCPUKMeans2D, cv::gapi::core::GKMeans2D) +{ + static void run(const std::vector& data, const int K, + const std::vector& inBestLabels, const cv::TermCriteria& criteria, + const int attempts, const cv::KmeansFlags flags, + double& compactness, std::vector& outBestLabels, + std::vector& centers) + { + if (flags & cv::KMEANS_USE_INITIAL_LABELS) + { + outBestLabels = inBestLabels; + } + compactness = cv::kmeans(data, K, outBestLabels, criteria, attempts, flags, centers); + } +}; + +GAPI_OCV_KERNEL(GCPUKMeans3D, cv::gapi::core::GKMeans3D) +{ + static void run(const std::vector& data, const int K, + const std::vector& inBestLabels, const cv::TermCriteria& criteria, + const int attempts, const cv::KmeansFlags flags, + double& compactness, std::vector& outBestLabels, + std::vector& centers) + { + if (flags & cv::KMEANS_USE_INITIAL_LABELS) + { + outBestLabels = inBestLabels; + } + compactness = cv::kmeans(data, K, outBestLabels, criteria, attempts, flags, centers); + } +}; + GAPI_OCV_KERNEL(GCPUParseSSDBL, cv::gapi::nn::parsers::GParseSSDBL) { static void run(const cv::Mat& in_ssd_result, @@ -714,6 +771,10 @@ cv::gapi::GKernelPackage cv::gapi::core::cpu::kernels() , GCPUNormalize , GCPUWarpPerspective , GCPUWarpAffine + , GCPUKMeansND + , GCPUKMeansNDNoInit + , GCPUKMeans2D + , GCPUKMeans3D , GCPUParseSSDBL , GOCVParseSSD , GCPUParseYolo diff --git a/modules/gapi/test/common/gapi_core_tests.hpp b/modules/gapi/test/common/gapi_core_tests.hpp index 889e32f1c1..48ac4482a7 100644 --- a/modules/gapi/test/common/gapi_core_tests.hpp +++ b/modules/gapi/test/common/gapi_core_tests.hpp @@ -151,6 +151,16 @@ GAPI_TEST_FIXTURE(WarpPerspectiveTest, initMatrixRandU, GAPI_TEST_FIXTURE(WarpAffineTest, initMatrixRandU, FIXTURE_API(CompareMats, double , double, int, int, cv::Scalar), 6, cmpF, angle, scale, flags, border_mode, border_value) +GAPI_TEST_FIXTURE(KMeansNDNoInitTest, initMatrixRandU, FIXTURE_API(int, cv::KmeansFlags), + 2, K, flags) +GAPI_TEST_FIXTURE(KMeansNDInitTest, initMatrixRandU, + FIXTURE_API(CompareMats, int, cv::KmeansFlags), 3, cmpF, K, flags) +GAPI_TEST_FIXTURE(KMeans2DNoInitTest, initNothing, FIXTURE_API(int, cv::KmeansFlags), + 2, K, flags) +GAPI_TEST_FIXTURE(KMeans2DInitTest, initNothing, FIXTURE_API(int, cv::KmeansFlags), 2, K, flags) +GAPI_TEST_FIXTURE(KMeans3DNoInitTest, initNothing, FIXTURE_API(int, cv::KmeansFlags), + 2, K, flags) +GAPI_TEST_FIXTURE(KMeans3DInitTest, initNothing, FIXTURE_API(int, cv::KmeansFlags), 2, K, flags) GAPI_TEST_EXT_BASE_FIXTURE(ParseSSDBLTest, ParserSSDTest, initNothing, FIXTURE_API(float, int), 2, confidence_threshold, filter_label) diff --git a/modules/gapi/test/common/gapi_core_tests_inl.hpp b/modules/gapi/test/common/gapi_core_tests_inl.hpp index 045b556369..0e2d5ad474 100644 --- a/modules/gapi/test/common/gapi_core_tests_inl.hpp +++ b/modules/gapi/test/common/gapi_core_tests_inl.hpp @@ -15,6 +15,16 @@ namespace opencv_test { +namespace +{ +template +inline bool compareVectorsAbsExact(const std::vector& outOCV, + const std::vector& outGAPI) +{ + return AbsExactVector().to_compare_f()(outOCV, outGAPI); +} +} + TEST_P(MathOpTest, MatricesAccuracyTest) { // G-API code & corresponding OpenCV code //////////////////////////////// @@ -1377,6 +1387,187 @@ TEST_P(NormalizeTest, Test) } } +TEST_P(KMeansNDNoInitTest, AccuracyTest) +{ + const int amount = sz.height != 1 ? sz.height : sz.width, + dim = sz.height != 1 ? sz.width : (type >> CV_CN_SHIFT) + 1; + // amount of channels + const cv::TermCriteria criteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 30, 0); + const int attempts = 1; + double compact_gapi = -1.; + cv::Mat labels_gapi, centers_gapi; + // G-API code ////////////////////////////////////////////////////////////// + cv::GMat in; + cv::GOpaque compactness; + cv::GMat outLabels, centers; + std::tie(compactness, outLabels, centers) = cv::gapi::kmeans(in, K, criteria, attempts, flags); + cv::GComputation c(cv::GIn(in), cv::GOut(compactness, outLabels, centers)); + c.apply(cv::gin(in_mat1), cv::gout(compact_gapi, labels_gapi, centers_gapi), getCompileArgs()); + // Validation ////////////////////////////////////////////////////////////// + { + EXPECT_GE(compact_gapi, 0.); + EXPECT_EQ(labels_gapi.cols, 1); + EXPECT_EQ(labels_gapi.rows, amount); + EXPECT_FALSE(labels_gapi.empty()); + EXPECT_EQ(centers_gapi.cols, dim); + EXPECT_EQ(centers_gapi.rows, K); + EXPECT_FALSE(centers_gapi.empty()); + } +} + +TEST_P(KMeansNDInitTest, AccuracyTest) +{ + const int amount = sz.height != 1 ? sz.height : sz.width; + const cv::TermCriteria criteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 30, 0); + const int attempts = 1; + cv::Mat bestLabels(cv::Size{1, amount}, CV_32SC1); + double compact_ocv = -1., compact_gapi = -1.; + cv::Mat labels_ocv, labels_gapi, centers_ocv, centers_gapi; + cv::randu(bestLabels, 0, K); + bestLabels.copyTo(labels_ocv); + // G-API code ////////////////////////////////////////////////////////////// + cv::GMat in, inLabels; + cv::GOpaque compactness; + cv::GMat outLabels, centers; + std::tie(compactness, outLabels, centers) = + cv::gapi::kmeans(in, K, inLabels, criteria, attempts, flags); + cv::GComputation c(cv::GIn(in, inLabels), cv::GOut(compactness, outLabels, centers)); + c.apply(cv::gin(in_mat1, bestLabels), cv::gout(compact_gapi, labels_gapi, centers_gapi), + getCompileArgs()); + // OpenCV code ///////////////////////////////////////////////////////////// + compact_ocv = cv::kmeans(in_mat1, K, labels_ocv, criteria, attempts, flags, centers_ocv); + // Comparison ////////////////////////////////////////////////////////////// + { + EXPECT_TRUE(compact_gapi == compact_ocv); + EXPECT_TRUE(cmpF(labels_gapi, labels_ocv)); + EXPECT_TRUE(cmpF(centers_gapi, centers_ocv)); + } +} + +TEST_P(KMeans2DNoInitTest, AccuracyTest) +{ + const int amount = sz.height; + const cv::TermCriteria criteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 30, 0); + const int attempts = 1; + std::vector in_vector{}; + double compact_gapi = -1.; + std::vector labels_gapi{}; + std::vector centers_gapi{}; + initPointsVectorRandU(amount, in_vector); + // G-API code ////////////////////////////////////////////////////////////// + cv::GArray in; + cv::GArray inLabels(std::vector{}); + cv::GOpaque compactness; + cv::GArray outLabels; + cv::GArray centers; + std::tie(compactness, outLabels, centers) = + cv::gapi::kmeans(in, K, inLabels, criteria, attempts, flags); + cv::GComputation c(cv::GIn(in), cv::GOut(compactness, outLabels, centers)); + c.apply(cv::gin(in_vector), cv::gout(compact_gapi, labels_gapi, centers_gapi), getCompileArgs()); + // Validation ////////////////////////////////////////////////////////////// + { + EXPECT_GE(compact_gapi, 0.); + EXPECT_EQ(labels_gapi.size(), static_cast(amount)); + EXPECT_EQ(centers_gapi.size(), static_cast(K)); + } +} + +TEST_P(KMeans2DInitTest, AccuracyTest) +{ + const int amount = sz.height; + const cv::TermCriteria criteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 30, 0); + const int attempts = 1; + std::vector in_vector{}; + std::vector bestLabels(amount); + double compact_ocv = -1., compact_gapi = -1.; + std::vector labels_ocv{}, labels_gapi{}; + std::vector centers_ocv{}, centers_gapi{}; + initPointsVectorRandU(amount, in_vector); + cv::randu(bestLabels, 0, K); + labels_ocv = bestLabels; + // G-API code ////////////////////////////////////////////////////////////// + cv::GArray in; + cv::GArray inLabels; + cv::GOpaque compactness; + cv::GArray outLabels; + cv::GArray centers; + std::tie(compactness, outLabels, centers) = + cv::gapi::kmeans(in, K, inLabels, criteria, attempts, flags); + cv::GComputation c(cv::GIn(in, inLabels), cv::GOut(compactness, outLabels, centers)); + c.apply(cv::gin(in_vector, bestLabels), cv::gout(compact_gapi, labels_gapi, centers_gapi), + getCompileArgs()); + // OpenCV code ///////////////////////////////////////////////////////////// + compact_ocv = cv::kmeans(in_vector, K, labels_ocv, criteria, attempts, flags, centers_ocv); + // Comparison ////////////////////////////////////////////////////////////// + { + EXPECT_TRUE(compact_gapi == compact_ocv); + EXPECT_TRUE(compareVectorsAbsExact(labels_gapi, labels_ocv)); + EXPECT_TRUE(compareVectorsAbsExact(centers_gapi, centers_ocv)); + } +} + +TEST_P(KMeans3DNoInitTest, AccuracyTest) +{ + const int amount = sz.height; + const cv::TermCriteria criteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 30, 0); + const int attempts = 1; + std::vector in_vector{}; + double compact_gapi = -1.; + std::vector labels_gapi{}; + std::vector centers_gapi{}; + initPointsVectorRandU(amount, in_vector); + // G-API code ////////////////////////////////////////////////////////////// + cv::GArray in; + cv::GArray inLabels(std::vector{}); + cv::GOpaque compactness; + cv::GArray outLabels; + cv::GArray centers; + std::tie(compactness, outLabels, centers) = + cv::gapi::kmeans(in, K, inLabels, criteria, attempts, flags); + cv::GComputation c(cv::GIn(in), cv::GOut(compactness, outLabels, centers)); + c.apply(cv::gin(in_vector), cv::gout(compact_gapi, labels_gapi, centers_gapi), getCompileArgs()); + // Validation ////////////////////////////////////////////////////////////// + { + EXPECT_GE(compact_gapi, 0.); + EXPECT_EQ(labels_gapi.size(), static_cast(amount)); + EXPECT_EQ(centers_gapi.size(), static_cast(K)); + } +} + +TEST_P(KMeans3DInitTest, AccuracyTest) +{ + const int amount = sz.height; + const cv::TermCriteria criteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 30, 0); + const int attempts = 1; + std::vector in_vector{}; + std::vector bestLabels(amount); + double compact_ocv = -1., compact_gapi = -1.; + std::vector labels_ocv{}, labels_gapi{}; + std::vector centers_ocv{}, centers_gapi{}; + initPointsVectorRandU(amount, in_vector); + cv::randu(bestLabels, 0, K); + labels_ocv = bestLabels; + // G-API code ////////////////////////////////////////////////////////////// + cv::GArray in; + cv::GArray inLabels; + cv::GOpaque compactness; + cv::GArray outLabels; + cv::GArray centers; + std::tie(compactness, outLabels, centers) = + cv::gapi::kmeans(in, K, inLabels, criteria, attempts, flags); + cv::GComputation c(cv::GIn(in, inLabels), cv::GOut(compactness, outLabels, centers)); + c.apply(cv::gin(in_vector, bestLabels), cv::gout(compact_gapi, labels_gapi, centers_gapi), + getCompileArgs()); + // OpenCV code ///////////////////////////////////////////////////////////// + compact_ocv = cv::kmeans(in_vector, K, labels_ocv, criteria, attempts, flags, centers_ocv); + // Comparison ////////////////////////////////////////////////////////////// + { + EXPECT_TRUE(compact_gapi == compact_ocv); + EXPECT_TRUE(compareVectorsAbsExact(labels_gapi, labels_ocv)); + EXPECT_TRUE(compareVectorsAbsExact(centers_gapi, centers_ocv)); + } +} + // PLEASE DO NOT PUT NEW ACCURACY TESTS BELOW THIS POINT! ////////////////////// TEST_P(BackendOutputAllocationTest, EmptyOutput) diff --git a/modules/gapi/test/common/gapi_tests_common.hpp b/modules/gapi/test/common/gapi_tests_common.hpp index 514fa2be38..6d11881372 100644 --- a/modules/gapi/test/common/gapi_tests_common.hpp +++ b/modules/gapi/test/common/gapi_tests_common.hpp @@ -1174,6 +1174,28 @@ inline std::ostream& operator<<(std::ostream& os, DistanceTypes op) #undef CASE return os; } + +inline std::ostream& operator<<(std::ostream& os, KmeansFlags op) +{ + int op_(op); + switch (op_) + { + case KmeansFlags::KMEANS_RANDOM_CENTERS: + os << "KMEANS_RANDOM_CENTERS"; + break; + case KmeansFlags::KMEANS_PP_CENTERS: + os << "KMEANS_PP_CENTERS"; + break; + case KmeansFlags::KMEANS_RANDOM_CENTERS | KmeansFlags::KMEANS_USE_INITIAL_LABELS: + os << "KMEANS_RANDOM_CENTERS | KMEANS_USE_INITIAL_LABELS"; + break; + case KmeansFlags::KMEANS_PP_CENTERS | KmeansFlags::KMEANS_USE_INITIAL_LABELS: + os << "KMEANS_PP_CENTERS | KMEANS_USE_INITIAL_LABELS"; + break; + default: GAPI_Assert(false && "unknown KmeansFlags value"); + } + return os; +} } // namespace cv #endif //OPENCV_GAPI_TESTS_COMMON_HPP diff --git a/modules/gapi/test/cpu/gapi_core_tests_cpu.cpp b/modules/gapi/test/cpu/gapi_core_tests_cpu.cpp index 595b63dd1f..fedc7c154f 100644 --- a/modules/gapi/test/cpu/gapi_core_tests_cpu.cpp +++ b/modules/gapi/test/cpu/gapi_core_tests_cpu.cpp @@ -484,6 +484,72 @@ INSTANTIATE_TEST_CASE_P(NormalizeTestCPU, NormalizeTest, Values(NORM_MINMAX, NORM_INF, NORM_L1, NORM_L2), Values(-1, CV_8U, CV_16U, CV_16S, CV_32F))); +INSTANTIATE_TEST_CASE_P(KMeansNDNoInitTestCPU, KMeansNDNoInitTest, + Combine(Values(CV_32FC1), + Values(cv::Size(2, 20)), + Values(-1), + Values(CORE_CPU), + Values(5), + Values(cv::KMEANS_RANDOM_CENTERS, cv::KMEANS_PP_CENTERS))); + +INSTANTIATE_TEST_CASE_P(KMeansNDInitTestCPU, KMeansNDInitTest, + Combine(Values(CV_32FC1, CV_32FC3), + Values(cv::Size(1, 20), + cv::Size(2, 20), + cv::Size(5, 720)), + Values(-1), + Values(CORE_CPU), + Values(AbsTolerance(0.01).to_compare_obj()), + Values(5, 15), + Values(cv::KMEANS_RANDOM_CENTERS | cv::KMEANS_USE_INITIAL_LABELS, + cv::KMEANS_PP_CENTERS | cv::KMEANS_USE_INITIAL_LABELS))); + +INSTANTIATE_TEST_CASE_P(KMeansNDInitReverseTestCPU, KMeansNDInitTest, + Combine(Values(CV_32FC3), + Values(cv::Size(20, 1)), + Values(-1), + Values(CORE_CPU), + Values(AbsTolerance(0.01).to_compare_obj()), + Values(5, 15), + Values(cv::KMEANS_RANDOM_CENTERS | cv::KMEANS_USE_INITIAL_LABELS, + cv::KMEANS_PP_CENTERS | cv::KMEANS_USE_INITIAL_LABELS))); + +INSTANTIATE_TEST_CASE_P(KMeans2DNoInitTestCPU, KMeans2DNoInitTest, + Combine(Values(-1), + Values(cv::Size(-1, 20)), + Values(-1), + Values(CORE_CPU), + Values(5), + Values(cv::KMEANS_RANDOM_CENTERS, cv::KMEANS_PP_CENTERS))); + +INSTANTIATE_TEST_CASE_P(KMeans2DInitTestCPU, KMeans2DInitTest, + Combine(Values(-1), + Values(cv::Size(-1, 720), + cv::Size(-1, 20)), + Values(-1), + Values(CORE_CPU), + Values(5, 15), + Values(cv::KMEANS_RANDOM_CENTERS | cv::KMEANS_USE_INITIAL_LABELS, + cv::KMEANS_PP_CENTERS | cv::KMEANS_USE_INITIAL_LABELS))); + +INSTANTIATE_TEST_CASE_P(KMeans3DNoInitTestCPU, KMeans3DNoInitTest, + Combine(Values(-1), + Values(cv::Size(-1, 20)), + Values(-1), + Values(CORE_CPU), + Values(5), + Values(cv::KMEANS_RANDOM_CENTERS, cv::KMEANS_PP_CENTERS))); + +INSTANTIATE_TEST_CASE_P(KMeans3DInitTestCPU, KMeans3DInitTest, + Combine(Values(-1), + Values(cv::Size(-1, 720), + cv::Size(-1, 20)), + Values(-1), + Values(CORE_CPU), + Values(5, 15), + Values(cv::KMEANS_RANDOM_CENTERS | cv::KMEANS_USE_INITIAL_LABELS, + cv::KMEANS_PP_CENTERS | cv::KMEANS_USE_INITIAL_LABELS))); + // PLEASE DO NOT PUT NEW ACCURACY TESTS BELOW THIS POINT! ////////////////////// INSTANTIATE_TEST_CASE_P(BackendOutputAllocationTestCPU, BackendOutputAllocationTest, From 74b6646737662ba848ea39f2d69fafc19145b022 Mon Sep 17 00:00:00 2001 From: Anton Potapov Date: Mon, 30 Nov 2020 09:40:42 +0300 Subject: [PATCH 009/422] GAPI: reuse copy_through_move_t in the gasync.cpp file --- modules/gapi/src/executor/gasync.cpp | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/modules/gapi/src/executor/gasync.cpp b/modules/gapi/src/executor/gasync.cpp index b92dbdcec4..902e6e1624 100644 --- a/modules/gapi/src/executor/gasync.cpp +++ b/modules/gapi/src/executor/gasync.cpp @@ -11,6 +11,8 @@ #include #include +#include + #include #include @@ -18,16 +20,6 @@ #include #include -namespace { - //This is a tool to move initialize captures of a lambda in C++11 - template - struct copy_through_move{ - T value; - copy_through_move(T&& g) : value(std::move(g)) {} - copy_through_move(copy_through_move&&) = default; - copy_through_move(copy_through_move const& lhs) : copy_through_move(std::move(const_cast(lhs))) {} - }; -} namespace cv { namespace gapi { @@ -168,7 +160,7 @@ const char* GAsyncCanceled::what() const noexcept { //For now these async functions are simply wrapping serial version of apply/operator() into a functor. //These functors are then serialized into single queue, which is processed by a devoted background thread. void async_apply(GComputation& gcomp, std::function&& callback, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args){ - //TODO: use copy_through_move for all args except gcomp + //TODO: use copy_through_move_t for all args except gcomp //TODO: avoid code duplication between versions of "async" functions auto l = [=]() mutable { auto apply_l = [&](){ @@ -181,7 +173,7 @@ void async_apply(GComputation& gcomp, std::function&& } std::future async_apply(GComputation& gcomp, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args){ - copy_through_move> prms{{}}; + util::copy_through_move_t> prms{{}}; auto f = prms.value.get_future(); auto l = [=]() mutable { auto apply_l = [&](){ @@ -196,7 +188,7 @@ std::future async_apply(GComputation& gcomp, GRunArgs &&ins, GRunArgsP &&o } void async_apply(GComputation& gcomp, std::function&& callback, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args, GAsyncContext& ctx){ - //TODO: use copy_through_move for all args except gcomp + //TODO: use copy_through_move_t for all args except gcomp auto l = [=, &ctx]() mutable { auto apply_l = [&](){ gcomp.apply(std::move(ins), std::move(outs), std::move(args)); @@ -208,7 +200,7 @@ void async_apply(GComputation& gcomp, std::function&& } std::future async_apply(GComputation& gcomp, GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args, GAsyncContext& ctx){ - copy_through_move> prms{{}}; + util::copy_through_move_t> prms{{}}; auto f = prms.value.get_future(); auto l = [=, &ctx]() mutable { auto apply_l = [&](){ @@ -248,7 +240,7 @@ void async(GCompiled& gcmpld, std::function&& callback } std::future async(GCompiled& gcmpld, GRunArgs &&ins, GRunArgsP &&outs){ - copy_through_move> prms{{}}; + util::copy_through_move_t> prms{{}}; auto f = prms.value.get_future(); auto l = [=]() mutable { auto apply_l = [&](){ @@ -263,7 +255,7 @@ std::future async(GCompiled& gcmpld, GRunArgs &&ins, GRunArgsP &&outs){ } std::future async(GCompiled& gcmpld, GRunArgs &&ins, GRunArgsP &&outs, GAsyncContext& ctx){ - copy_through_move> prms{{}}; + util::copy_through_move_t> prms{{}}; auto f = prms.value.get_future(); auto l = [=, &ctx]() mutable { auto apply_l = [&](){ From 24fac5f56d61e0caa757a99c0bbfbc9239d6ce7d Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Mon, 30 Nov 2020 11:54:51 +0300 Subject: [PATCH 010/422] Added test for VideoCapture CAP_PROP_FRAME_MSEC option. - Suppressed FFMPEG + h264, h265 as it does not pass tests with CI configuration. - Suppressed MediaFoundation backend as it always returns zero for now. --- modules/videoio/src/cap_mjpeg_decoder.cpp | 2 ++ modules/videoio/test/test_video_io.cpp | 30 +++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/modules/videoio/src/cap_mjpeg_decoder.cpp b/modules/videoio/src/cap_mjpeg_decoder.cpp index 116f118d28..a3c87812ac 100644 --- a/modules/videoio/src/cap_mjpeg_decoder.cpp +++ b/modules/videoio/src/cap_mjpeg_decoder.cpp @@ -116,6 +116,8 @@ double MotionJpegCapture::getProperty(int property) const { case CAP_PROP_POS_FRAMES: return (double)getFramePos(); + case CAP_PROP_POS_MSEC: + return (double)getFramePos() * (1000. / m_fps); case CAP_PROP_POS_AVI_RATIO: return double(getFramePos())/m_mjpeg_frames.size(); case CAP_PROP_FRAME_WIDTH: diff --git a/modules/videoio/test/test_video_io.cpp b/modules/videoio/test/test_video_io.cpp index 97b43ab68c..3f5617d8ce 100644 --- a/modules/videoio/test/test_video_io.cpp +++ b/modules/videoio/test/test_video_io.cpp @@ -231,6 +231,34 @@ public: else std::cout << "Frames counter is not available. Actual frames: " << count_actual << ". SKIP check." << std::endl; } + + void doTimestampTest() + { + if (!isBackendAvailable(apiPref, cv::videoio_registry::getStreamBackends())) + throw SkipTestException(cv::String("Backend is not available/disabled: ") + cv::videoio_registry::getBackendName(apiPref)); + + if ((apiPref == CAP_MSMF) || ((apiPref == CAP_FFMPEG) && ((ext == "h264") || (ext == "h265")))) + throw SkipTestException(cv::String("Backend ") + cv::videoio_registry::getBackendName(apiPref) + + cv::String(" does not support CAP_PROP_POS_MSEC option")); + + VideoCapture cap; + EXPECT_NO_THROW(cap.open(video_file, apiPref)); + if (!cap.isOpened()) + throw SkipTestException(cv::String("Backend ") + cv::videoio_registry::getBackendName(apiPref) + + cv::String(" can't open the video: ") + video_file); + + Mat img; + for(int i = 0; i < 10; i++) + { + double timestamp = 0; + ASSERT_NO_THROW(cap >> img); + EXPECT_NO_THROW(timestamp = cap.get(CAP_PROP_POS_MSEC)); + const double frame_period = 1000.f/bunny_param.getFps(); + // NOTE: eps == frame_period, because videoCapture returns frame begining timestamp or frame end + // timestamp depending on codec and back-end. So the first frame has timestamp 0 or frame_period. + EXPECT_NEAR(timestamp, i*frame_period, frame_period); + } + } }; //================================================================================================== @@ -367,6 +395,8 @@ TEST_P(Videoio_Bunny, read_position) { doTest(); } TEST_P(Videoio_Bunny, frame_count) { doFrameCountTest(); } +TEST_P(Videoio_Bunny, frame_timestamp) { doTimestampTest(); } + INSTANTIATE_TEST_CASE_P(videoio, Videoio_Bunny, testing::Combine( testing::ValuesIn(bunny_params), From 56568dae311acb67d1753ab0447d7c021538a9af Mon Sep 17 00:00:00 2001 From: Anna Khakimova Date: Mon, 30 Nov 2020 21:09:42 +0300 Subject: [PATCH 011/422] Merge pull request #18674 from anna-khakimova:ak/backgroundSubtractor GAPI: New BackgroundSubtractor stateful kernel * New BackgroundSubtractorMOG2 kernel * Add BS parameters --- modules/gapi/include/opencv2/gapi/video.hpp | 91 +++++++++++++++++++ modules/gapi/src/api/kernels_video.cpp | 5 + modules/gapi/src/backends/cpu/gcpuvideo.cpp | 28 ++++++ modules/gapi/test/common/gapi_video_tests.hpp | 3 + .../test/common/gapi_video_tests_common.hpp | 29 ++++++ .../gapi/test/common/gapi_video_tests_inl.hpp | 44 +++++++++ .../gapi/test/cpu/gapi_video_tests_cpu.cpp | 14 +++ 7 files changed, 214 insertions(+) diff --git a/modules/gapi/include/opencv2/gapi/video.hpp b/modules/gapi/include/opencv2/gapi/video.hpp index 7f90134e6d..79236f377b 100644 --- a/modules/gapi/include/opencv2/gapi/video.hpp +++ b/modules/gapi/include/opencv2/gapi/video.hpp @@ -62,6 +62,73 @@ G_TYPED_KERNEL(GCalcOptFlowLKForPyr, return std::make_tuple(empty_array_desc(), empty_array_desc(), empty_array_desc()); } }; + +enum BackgroundSubtractorType +{ + TYPE_BS_MOG2, + TYPE_BS_KNN +}; + +/** @brief Structure for the Background Subtractor operation's initialization parameters.*/ + +struct BackgroundSubtractorParams +{ + //! Type of the Background Subtractor operation. + BackgroundSubtractorType operation = TYPE_BS_MOG2; + + //! Length of the history. + int history = 500; + + //! For MOG2: Threshold on the squared Mahalanobis distance between the pixel + //! and the model to decide whether a pixel is well described by + //! the background model. + //! For KNN: Threshold on the squared distance between the pixel and the sample + //! to decide whether a pixel is close to that sample. + double threshold = 16; + + //! If true, the algorithm will detect shadows and mark them. + bool detectShadows = true; + + //! The value between 0 and 1 that indicates how fast + //! the background model is learnt. + //! Negative parameter value makes the algorithm use some automatically + //! chosen learning rate. + double learningRate = -1; + + //! default constructor + BackgroundSubtractorParams() {} + + /** Full constructor + @param op MOG2/KNN Background Subtractor type. + @param histLength Length of the history. + @param thrshld For MOG2: Threshold on the squared Mahalanobis distance between + the pixel and the model to decide whether a pixel is well described by the background model. + For KNN: Threshold on the squared distance between the pixel and the sample to decide + whether a pixel is close to that sample. + @param detect If true, the algorithm will detect shadows and mark them. It decreases the + speed a bit, so if you do not need this feature, set the parameter to false. + @param lRate The value between 0 and 1 that indicates how fast the background model is learnt. + Negative parameter value makes the algorithm to use some automatically chosen learning rate. + */ + BackgroundSubtractorParams(BackgroundSubtractorType op, int histLength, + double thrshld, bool detect, double lRate) : operation(op), + history(histLength), + threshold(thrshld), + detectShadows(detect), + learningRate(lRate){} +}; + +G_TYPED_KERNEL(GBackgroundSubtractor, , + "org.opencv.video.BackgroundSubtractor") +{ + static GMatDesc outMeta(const GMatDesc& in, const BackgroundSubtractorParams& bsParams) + { + GAPI_Assert(bsParams.history >= 0); + GAPI_Assert(bsParams.learningRate <= 1); + return in.withType(CV_8U, 1); + } +}; + } //namespace video //! @addtogroup gapi_video @@ -169,8 +236,32 @@ calcOpticalFlowPyrLK(const GArray &prevPyr, int flags = 0, double minEigThresh = 1e-4); +/** @brief Gaussian Mixture-based or K-nearest neighbours-based Background/Foreground Segmentation Algorithm. +The operation generates a foreground mask. + +@return Output image is foreground mask, i.e. 8-bit unsigned 1-channel (binary) matrix @ref CV_8UC1. + +@note Functional textual ID is "org.opencv.video.BackgroundSubtractor" + +@param src input image: Floating point frame is used without scaling and should be in range [0,255]. +@param bsParams Set of initialization parameters for Background Subtractor kernel. +*/ +GAPI_EXPORTS GMat BackgroundSubtractor(const GMat& src, const cv::gapi::video::BackgroundSubtractorParams& bsParams); + //! @} gapi_video } //namespace gapi } //namespace cv + +namespace cv { namespace detail { +template<> struct CompileArgTag +{ + static const char* tag() + { + return "org.opencv.video.background_substractor_params"; + } +}; +} // namespace detail +} //namespace cv + #endif // OPENCV_GAPI_VIDEO_HPP diff --git a/modules/gapi/src/api/kernels_video.cpp b/modules/gapi/src/api/kernels_video.cpp index eff6d48874..b7c825f624 100644 --- a/modules/gapi/src/api/kernels_video.cpp +++ b/modules/gapi/src/api/kernels_video.cpp @@ -52,5 +52,10 @@ GOptFlowLKOutput calcOpticalFlowPyrLK(const cv::GArray &prevPyr, criteria, flags, minEigThresh); } +GMat BackgroundSubtractor(const GMat& src, const BackgroundSubtractorParams& bsp) +{ + return GBackgroundSubtractor::on(src, bsp); +} + } //namespace gapi } //namespace cv diff --git a/modules/gapi/src/backends/cpu/gcpuvideo.cpp b/modules/gapi/src/backends/cpu/gcpuvideo.cpp index ac8e9e4003..bc526d7bde 100644 --- a/modules/gapi/src/backends/cpu/gcpuvideo.cpp +++ b/modules/gapi/src/backends/cpu/gcpuvideo.cpp @@ -80,12 +80,40 @@ GAPI_OCV_KERNEL(GCPUCalcOptFlowLKForPyr, cv::gapi::video::GCalcOptFlowLKForPyr) } }; +GAPI_OCV_KERNEL_ST(GCPUBackgroundSubtractor, + cv::gapi::video::GBackgroundSubtractor, + cv::BackgroundSubtractor) +{ + static void setup(const cv::GMatDesc&, const cv::gapi::video::BackgroundSubtractorParams& bsParams, + std::shared_ptr& state, + const cv::GCompileArgs&) + { + if (bsParams.operation == cv::gapi::video::TYPE_BS_MOG2) + state = cv::createBackgroundSubtractorMOG2(bsParams.history, + bsParams.threshold, + bsParams.detectShadows); + else if (bsParams.operation == cv::gapi::video::TYPE_BS_KNN) + state = cv::createBackgroundSubtractorKNN(bsParams.history, + bsParams.threshold, + bsParams.detectShadows); + + GAPI_Assert(state); + } + + static void run(const cv::Mat& in, const cv::gapi::video::BackgroundSubtractorParams& bsParams, + cv::Mat &out, cv::BackgroundSubtractor& state) + { + state.apply(in, out, bsParams.learningRate); + } +}; + cv::gapi::GKernelPackage cv::gapi::video::cpu::kernels() { static auto pkg = cv::gapi::kernels < GCPUBuildOptFlowPyramid , GCPUCalcOptFlowLK , GCPUCalcOptFlowLKForPyr + , GCPUBackgroundSubtractor >(); return pkg; } diff --git a/modules/gapi/test/common/gapi_video_tests.hpp b/modules/gapi/test/common/gapi_video_tests.hpp index df57bf4a0f..ab12528259 100644 --- a/modules/gapi/test/common/gapi_video_tests.hpp +++ b/modules/gapi/test/common/gapi_video_tests.hpp @@ -28,6 +28,9 @@ GAPI_TEST_FIXTURE_SPEC_PARAMS(BuildPyr_CalcOptFlow_PipelineTest, FIXTURE_API(std::string,int,int,bool), 4, fileNamePattern, winSize, maxLevel, withDerivatives) +GAPI_TEST_FIXTURE_SPEC_PARAMS(BackgroundSubtractorTest, FIXTURE_API(tuple, + int, bool, double, std::string, std::size_t), + 6, typeAndThreshold, histLength, detectShadows, learningRate, filePath, testNumFrames) } // opencv_test diff --git a/modules/gapi/test/common/gapi_video_tests_common.hpp b/modules/gapi/test/common/gapi_video_tests_common.hpp index c12a817b2e..da26aa0089 100644 --- a/modules/gapi/test/common/gapi_video_tests_common.hpp +++ b/modules/gapi/test/common/gapi_video_tests_common.hpp @@ -321,6 +321,35 @@ inline GComputation runOCVnGAPIOptFlowPipeline(TestFunctional& testInst, return c; } +inline void testBackgroundSubtractorStreaming(cv::GStreamingCompiled& gapiBackSub, + const cv::Ptr& pOCVBackSub, + const int diffPercent, const int tolerance, + const double lRate, const std::size_t testNumFrames) +{ + cv::Mat frame, gapiForeground, ocvForeground; + double numDiff = diffPercent / 100.0; + + gapiBackSub.start(); + EXPECT_TRUE(gapiBackSub.running()); + + compare_f cmpF = AbsSimilarPoints(tolerance, numDiff).to_compare_f(); + + // Comparison of G-API and OpenCV substractors + std::size_t frames = 0u; + while (frames <= testNumFrames && gapiBackSub.pull(cv::gout(frame, gapiForeground))) + { + pOCVBackSub->apply(frame, ocvForeground, lRate); + EXPECT_TRUE(cmpF(gapiForeground, ocvForeground)); + frames++; + } + + if (gapiBackSub.running()) + gapiBackSub.stop(); + + EXPECT_LT(0u, frames); + EXPECT_FALSE(gapiBackSub.running()); +} + #else // !HAVE_OPENCV_VIDEO inline cv::GComputation runOCVnGAPIBuildOptFlowPyramid(TestFunctional&, diff --git a/modules/gapi/test/common/gapi_video_tests_inl.hpp b/modules/gapi/test/common/gapi_video_tests_inl.hpp index 965c06a328..627c6543db 100644 --- a/modules/gapi/test/common/gapi_video_tests_inl.hpp +++ b/modules/gapi/test/common/gapi_video_tests_inl.hpp @@ -8,6 +8,7 @@ #define OPENCV_GAPI_VIDEO_TESTS_INL_HPP #include "gapi_video_tests.hpp" +#include namespace opencv_test { @@ -88,6 +89,49 @@ TEST_P(BuildPyr_CalcOptFlow_PipelineTest, AccuracyTest) compareOutputsOptFlow(outOCV, outGAPI); } +#ifdef HAVE_OPENCV_VIDEO +TEST_P(BackgroundSubtractorTest, AccuracyTest) +{ + initTestDataPath(); + + cv::gapi::video::BackgroundSubtractorType opType; + double thr = -1; + std::tie(opType, thr) = typeAndThreshold; + + cv::gapi::video::BackgroundSubtractorParams bsp(opType, histLength, thr, + detectShadows, learningRate); + + // G-API graph declaration + cv::GMat in; + cv::GMat out = cv::gapi::BackgroundSubtractor(in, bsp); + // Preserving 'in' in output to have possibility to compare with OpenCV reference + cv::GComputation c(cv::GIn(in), cv::GOut(cv::gapi::copy(in), out)); + + // G-API compilation of graph for streaming mode + auto gapiBackSub = c.compileStreaming(getCompileArgs()); + + // Testing G-API Background Substractor in streaming mode + auto path = findDataFile("cv/video/768x576.avi"); + try + { + gapiBackSub.setSource(gapi::wip::make_src(path)); + } + catch (...) + { throw SkipTestException("Video file can't be opened."); } + + cv::Ptr pOCVBackSub; + + if (opType == cv::gapi::video::TYPE_BS_MOG2) + pOCVBackSub = cv::createBackgroundSubtractorMOG2(histLength, thr, + detectShadows); + else if (opType == cv::gapi::video::TYPE_BS_KNN) + pOCVBackSub = cv::createBackgroundSubtractorKNN(histLength, thr, + detectShadows); + + // Allowing 1% difference of all pixels between G-API and reference OpenCV results + testBackgroundSubtractorStreaming(gapiBackSub, pOCVBackSub, 1, 1, learningRate, testNumFrames); +} +#endif } // opencv_test #endif // OPENCV_GAPI_VIDEO_TESTS_INL_HPP diff --git a/modules/gapi/test/cpu/gapi_video_tests_cpu.cpp b/modules/gapi/test/cpu/gapi_video_tests_cpu.cpp index c4659adf4c..c84b904072 100644 --- a/modules/gapi/test/cpu/gapi_video_tests_cpu.cpp +++ b/modules/gapi/test/cpu/gapi_video_tests_cpu.cpp @@ -97,4 +97,18 @@ INSTANTIATE_TEST_CASE_MACRO_P(WITH_VIDEO(BuildPyr_CalcOptFlow_PipelineInternalTe Values(15), Values(3), Values(true))); + + +INSTANTIATE_TEST_CASE_MACRO_P(WITH_VIDEO(BackgroundSubtractorTestCPU), + BackgroundSubtractorTest, + Combine(Values(VIDEO_CPU), + Values(std::make_tuple(cv::gapi::video::TYPE_BS_MOG2, 16), + std::make_tuple(cv::gapi::video::TYPE_BS_MOG2, 8), + std::make_tuple(cv::gapi::video::TYPE_BS_KNN, 400), + std::make_tuple(cv::gapi::video::TYPE_BS_KNN, 200)), + Values(500, 50), + Values(true, false), + Values(-1, 0, 0.5, 1), + Values("cv/video/768x576.avi"), + Values(3))); } // opencv_test From 69e1167882a8145bfcc2ba3a89fcdc23ee8b7913 Mon Sep 17 00:00:00 2001 From: Jonathan Cole Date: Mon, 30 Nov 2020 14:10:37 -0500 Subject: [PATCH 012/422] Delete xcframework if it already exists before building a new one --- platforms/apple/build_xcframework.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/platforms/apple/build_xcframework.py b/platforms/apple/build_xcframework.py index e8a77d230f..712bfdc629 100755 --- a/platforms/apple/build_xcframework.py +++ b/platforms/apple/build_xcframework.py @@ -4,7 +4,7 @@ This script builds OpenCV into an xcframework compatible with the platforms of your choice. Just run it and grab a snack; you'll be waiting a while. """ -import sys, os, argparse, pathlib, traceback +import sys, os, argparse, pathlib, traceback, contextlib, shutil from cv_build_utils import execute, print_error, print_header, get_xcode_version, get_cmake_version if __name__ == "__main__": @@ -67,7 +67,7 @@ if __name__ == "__main__": # Build phase try: - # Build .frameworks for each platform + # Phase 1: build .frameworks for each platform osx_script_path = os.path.abspath(os.path.abspath(os.path.dirname(__file__))+'/../osx/build_framework.py') ios_script_path = os.path.abspath(os.path.abspath(os.path.dirname(__file__))+'/../ios/build_framework.py') @@ -104,22 +104,29 @@ if __name__ == "__main__": print_header("Building Catalyst frameworks") execute(command, cwd=os.getcwd()) - # Put all the built .frameworks together into a .xcframework - print_header("Building xcframework") + # Phase 2: put all the built .frameworks together into a .xcframework + + xcframework_path = "{}/{}.xcframework".format(args.out, args.framework_name) + print_header("Building {}".format(xcframework_path)) + + # Remove the xcframework if it exists, otherwise the existing + # file will cause the xcodebuild command to fail. + with contextlib.suppress(FileNotFoundError): + shutil.rmtree(xcframework_path) + print("Removed existing xcframework at {}".format(xcframework_path)) - framework_path = "{}/{}.xcframework".format(args.out, args.framework_name) xcframework_build_command = [ "xcodebuild", "-create-xcframework", "-output", - framework_path, + xcframework_path, ] for folder in build_folders: - xcframework_build_command += ["-framework", "{}/{}.framework".format(folder, args.framework_name)] + xcframework_build_command += ["-framework", xcframework_path] execute(xcframework_build_command, cwd=os.getcwd()) print("") - print_header("Finished building {}".format(framework_path)) + print_header("Finished building {}".format(xcframework_path)) except Exception as e: print_error(e) traceback.print_exc(file=sys.stderr) From 3c40f87af38ef520b08ace8e31bd12f0c1511bdc Mon Sep 17 00:00:00 2001 From: Chris Ballinger Date: Mon, 30 Nov 2020 17:34:34 -0800 Subject: [PATCH 013/422] Bump default IPHONEOS_DEPLOYMENT_TARGET to 9.0 --- platforms/ios/run_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platforms/ios/run_tests.py b/platforms/ios/run_tests.py index de302c2bfe..12603b2226 100755 --- a/platforms/ios/run_tests.py +++ b/platforms/ios/run_tests.py @@ -7,7 +7,7 @@ from __future__ import print_function import glob, re, os, os.path, shutil, string, sys, argparse, traceback, multiprocessing from subprocess import check_call, check_output, CalledProcessError -IPHONEOS_DEPLOYMENT_TARGET='8.0' # default, can be changed via command line options or environment variable +IPHONEOS_DEPLOYMENT_TARGET='9.0' # default, can be changed via command line options or environment variable def execute(cmd, cwd = None): print("Executing: %s in %s" % (cmd, cwd), file=sys.stderr) From 446f3448180075cfbb8227644352ad9c3e573caf Mon Sep 17 00:00:00 2001 From: Anton Potapov Date: Tue, 1 Dec 2020 09:34:53 +0300 Subject: [PATCH 014/422] GAPI: fix C++17 compilation errors in GNetPackage (fixes #17385) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - explicitly declared default constructor - made initilizer_list  constructor to accept the list by copy    -- as it is  more canonical (and as copying the initializer_list does not force copy of the list items)    -- current version anyway does not do what it is intended to --- modules/gapi/include/opencv2/gapi/infer.hpp | 4 ++-- modules/gapi/src/api/ginfer.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/gapi/include/opencv2/gapi/infer.hpp b/modules/gapi/include/opencv2/gapi/infer.hpp index b850775a62..f20f638017 100644 --- a/modules/gapi/include/opencv2/gapi/infer.hpp +++ b/modules/gapi/include/opencv2/gapi/infer.hpp @@ -418,8 +418,8 @@ struct GAPI_EXPORTS GNetParam { * @sa cv::gapi::networks */ struct GAPI_EXPORTS_W_SIMPLE GNetPackage { - GAPI_WRAP GNetPackage() : GNetPackage({}) {} - explicit GNetPackage(std::initializer_list &&ii); + GAPI_WRAP GNetPackage() = default; + explicit GNetPackage(std::initializer_list ii); std::vector backends() const; std::vector networks; }; diff --git a/modules/gapi/src/api/ginfer.cpp b/modules/gapi/src/api/ginfer.cpp index 156f8938c4..f4bd1c3abb 100644 --- a/modules/gapi/src/api/ginfer.cpp +++ b/modules/gapi/src/api/ginfer.cpp @@ -16,8 +16,8 @@ #include -cv::gapi::GNetPackage::GNetPackage(std::initializer_list &&ii) - : networks(std::move(ii)) { +cv::gapi::GNetPackage::GNetPackage(std::initializer_list ii) + : networks(ii) { } std::vector cv::gapi::GNetPackage::backends() const { From eb6d8e6af233e0c021bfae4e04f36b2b844157a8 Mon Sep 17 00:00:00 2001 From: Anton Potapov Date: Tue, 1 Dec 2020 11:12:36 +0300 Subject: [PATCH 015/422] TBB executor for GAPI: fix race consition in Async test The test has race condition, which is addressed by the patch. The race is next: Master thread is calling execute (effectively blocked, waiting for callback to be called) "Async" thread picks up the callback Call the callback Then sets the variables in test After call back is called, master thread is unblocked and may check the variables (set in point 4 by the "async" thread) earlier then they actually changed Changes: callback should be called as the last step (after flag variables are set), as it effectively unblock the master thread fixes #18974 --- modules/gapi/test/executor/gtbbexecutor_internal_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gapi/test/executor/gtbbexecutor_internal_tests.cpp b/modules/gapi/test/executor/gtbbexecutor_internal_tests.cpp index d793683f94..bdc3bb2360 100644 --- a/modules/gapi/test/executor/gtbbexecutor_internal_tests.cpp +++ b/modules/gapi/test/executor/gtbbexecutor_internal_tests.cpp @@ -95,9 +95,9 @@ TEST(TBBExecutor, AsyncBasic) { if (!slept) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } - callback(); callback_called = true; master_was_blocked_until_callback_called = (master_is_waiting == true); + callback(); }); auto async_task_body = [&](std::function&& cb, size_t /*total_order_index*/) { From 9cef41000abc4733b86954050e9f7989ae5af122 Mon Sep 17 00:00:00 2001 From: Sergei Slashchinin <62052793+sl-sergei@users.noreply.github.com> Date: Tue, 1 Dec 2020 15:52:09 +0300 Subject: [PATCH 016/422] Merge pull request #18973 from sl-sergei:fix_vulkan_build * Fix build when HAVE_VULKAN is ON * Fix warnings --- modules/dnn/src/layers/pooling_layer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 621315a572..465ce0bed5 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -440,9 +440,9 @@ public: { int padding_mode; vkcom::PoolType pool_type; - int filter_size[2] = {kernel.height, kernel.width}; - int pad_size[2] = {pad.height, pad.width}; - int stride_size[2] = {stride.height, stride.width}; + int filter_size[2] = {static_cast(kernel_size[0]), static_cast(kernel_size[1])}; + int pad_size[2] = {static_cast(pads_begin[0]), static_cast(pads_begin[1])}; + int stride_size[2] = {static_cast(strides[0]), static_cast(strides[1])}; pool_type = type == MAX ? vkcom::kPoolTypeMax: (type == AVE ? vkcom::kPoolTypeAvg: vkcom::kPoolTypeNum); From 9d37cdaa6605d4b1512666fde7737e2bd80d839d Mon Sep 17 00:00:00 2001 From: Daniel Cauchi <33454325+CowKeyMan@users.noreply.github.com> Date: Tue, 1 Dec 2020 14:50:24 +0100 Subject: [PATCH 017/422] Merge pull request #18891 from CowKeyMan:NMS_boxes_with_different_labels Add option for NMS for boxes with different labels * DetectionModel impl * Add option for NMS for boxes with different labels In the detect function in modules/dnn/include/opencv2/dnn/dnn.hpp, whose implementation can be found at modules/dnn/src/model.cpp, the Non Max Suppression (NMS) is applied only for objects of the same label. Thus, a flag was added with the purpose to allow developers to choose if they want to keep the default implementation or wether they would like NMS to be applied to all the boxes, regardless of label. The flag is called nmsDifferentLabels, and is given a default value of false, which applies the current default implementation, thus allowing existing projects to update opencv without disruption Solves issue opencv#18832 * Change return type of set & Add default constr * Add assertions due to default constructor --- modules/dnn/include/opencv2/dnn/dnn.hpp | 17 ++++ modules/dnn/src/model.cpp | 126 +++++++++++++++++------- modules/dnn/test/test_model.cpp | 57 ++++++++++- 3 files changed, 165 insertions(+), 35 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 69b71f90ce..5467c989ac 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -1296,6 +1296,23 @@ CV__DNN_INLINE_NS_BEGIN */ CV_WRAP DetectionModel(const Net& network); + CV_DEPRECATED_EXTERNAL // avoid using in C++ code (need to fix bindings first) + DetectionModel(); + + /** + * @brief nmsAcrossClasses defaults to false, + * such that when non max suppression is used during the detect() function, it will do so per-class. + * This function allows you to toggle this behaviour. + * @param[in] value The new value for nmsAcrossClasses + */ + CV_WRAP DetectionModel& setNmsAcrossClasses(bool value); + + /** + * @brief Getter for nmsAcrossClasses. This variable defaults to false, + * such that when non max suppression is used during the detect() function, it will do so only per-class + */ + CV_WRAP bool getNmsAcrossClasses(); + /** @brief Given the @p input frame, create input blob, run net and return result detections. * @param[in] frame The input image. * @param[out] classIds Class indexes in result detection. diff --git a/modules/dnn/src/model.cpp b/modules/dnn/src/model.cpp index aefeaa42b3..16f7d31a25 100644 --- a/modules/dnn/src/model.cpp +++ b/modules/dnn/src/model.cpp @@ -320,34 +320,78 @@ void SegmentationModel::segment(InputArray frame, OutputArray mask) } } -void disableRegionNMS(Net& net) +class DetectionModel_Impl : public Model::Impl { - for (String& name : net.getUnconnectedOutLayersNames()) +public: + virtual ~DetectionModel_Impl() {} + DetectionModel_Impl() : Impl() {} + DetectionModel_Impl(const DetectionModel_Impl&) = delete; + DetectionModel_Impl(DetectionModel_Impl&&) = delete; + + void disableRegionNMS(Net& net) { - int layerId = net.getLayerId(name); - Ptr layer = net.getLayer(layerId).dynamicCast(); - if (!layer.empty()) + for (String& name : net.getUnconnectedOutLayersNames()) { - layer->nmsThreshold = 0; + int layerId = net.getLayerId(name); + Ptr layer = net.getLayer(layerId).dynamicCast(); + if (!layer.empty()) + { + layer->nmsThreshold = 0; + } } } -} + + void setNmsAcrossClasses(bool value) { + nmsAcrossClasses = value; + } + + bool getNmsAcrossClasses() { + return nmsAcrossClasses; + } + +private: + bool nmsAcrossClasses = false; +}; DetectionModel::DetectionModel(const String& model, const String& config) - : Model(model, config) + : DetectionModel(readNet(model, config)) { - disableRegionNMS(getNetwork_()); // FIXIT Move to DetectionModel::Impl::initNet() + // nothing } -DetectionModel::DetectionModel(const Net& network) : Model(network) +DetectionModel::DetectionModel(const Net& network) : Model() { - disableRegionNMS(getNetwork_()); // FIXIT Move to DetectionModel::Impl::initNet() + impl = makePtr(); + impl->initNet(network); + impl.dynamicCast()->disableRegionNMS(getNetwork_()); // FIXIT Move to DetectionModel::Impl::initNet() +} + +DetectionModel::DetectionModel() : Model() +{ + // nothing +} + +DetectionModel& DetectionModel::setNmsAcrossClasses(bool value) +{ + CV_Assert(impl != nullptr && impl.dynamicCast() != nullptr); // remove once default constructor is removed + + impl.dynamicCast()->setNmsAcrossClasses(value); + return *this; +} + +bool DetectionModel::getNmsAcrossClasses() +{ + CV_Assert(impl != nullptr && impl.dynamicCast() != nullptr); // remove once default constructor is removed + + return impl.dynamicCast()->getNmsAcrossClasses(); } void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, CV_OUT std::vector& confidences, CV_OUT std::vector& boxes, float confThreshold, float nmsThreshold) { + CV_Assert(impl != nullptr && impl.dynamicCast() != nullptr); // remove once default constructor is removed + std::vector detections; impl->processFrame(frame, detections); @@ -413,7 +457,7 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, { std::vector predClassIds; std::vector predBoxes; - std::vector predConf; + std::vector predConfidences; for (int i = 0; i < detections.size(); ++i) { // Network produces output blob with a shape NxC where N is a number of @@ -442,37 +486,51 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, height = std::max(1, std::min(height, frameHeight - top)); predClassIds.push_back(classIdPoint.x); - predConf.push_back(static_cast(conf)); + predConfidences.push_back(static_cast(conf)); predBoxes.emplace_back(left, top, width, height); } } if (nmsThreshold) { - std::map > class2indices; - for (size_t i = 0; i < predClassIds.size(); i++) + if (getNmsAcrossClasses()) { - if (predConf[i] >= confThreshold) - { - class2indices[predClassIds[i]].push_back(i); - } - } - for (const auto& it : class2indices) - { - std::vector localBoxes; - std::vector localConfidences; - for (size_t idx : it.second) - { - localBoxes.push_back(predBoxes[idx]); - localConfidences.push_back(predConf[idx]); - } std::vector indices; - NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, indices); - classIds.resize(classIds.size() + indices.size(), it.first); + NMSBoxes(predBoxes, predConfidences, confThreshold, nmsThreshold, indices); for (int idx : indices) { - boxes.push_back(localBoxes[idx]); - confidences.push_back(localConfidences[idx]); + boxes.push_back(predBoxes[idx]); + confidences.push_back(predConfidences[idx]); + classIds.push_back(predClassIds[idx]); + } + } + else + { + std::map > class2indices; + for (size_t i = 0; i < predClassIds.size(); i++) + { + if (predConfidences[i] >= confThreshold) + { + class2indices[predClassIds[i]].push_back(i); + } + } + for (const auto& it : class2indices) + { + std::vector localBoxes; + std::vector localConfidences; + for (size_t idx : it.second) + { + localBoxes.push_back(predBoxes[idx]); + localConfidences.push_back(predConfidences[idx]); + } + std::vector indices; + NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, indices); + classIds.resize(classIds.size() + indices.size(), it.first); + for (int idx : indices) + { + boxes.push_back(localBoxes[idx]); + confidences.push_back(localConfidences[idx]); + } } } } @@ -480,7 +538,7 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, { boxes = std::move(predBoxes); classIds = std::move(predClassIds); - confidences = std::move(predConf); + confidences = std::move(predConfidences); } } else diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp index 7d516de73e..58a881488a 100644 --- a/modules/dnn/test/test_model.cpp +++ b/modules/dnn/test/test_model.cpp @@ -25,7 +25,8 @@ public: double scoreDiff, double iouDiff, double confThreshold = 0.24, double nmsThreshold = 0.0, const Size& size = {-1, -1}, Scalar mean = Scalar(), - double scale = 1.0, bool swapRB = false, bool crop = false) + double scale = 1.0, bool swapRB = false, bool crop = false, + bool nmsAcrossClasses = false) { checkBackend(); @@ -38,6 +39,8 @@ public: model.setPreferableBackend(backend); model.setPreferableTarget(target); + model.setNmsAcrossClasses(nmsAcrossClasses); + std::vector classIds; std::vector confidences; std::vector boxes; @@ -177,6 +180,58 @@ TEST_P(Test_Model, DetectRegion) Scalar(), scale, swapRB); } +TEST_P(Test_Model, DetectRegionWithNmsAcrossClasses) +{ + applyTestTag(CV_TEST_TAG_LONG, CV_TEST_TAG_MEMORY_1GB); + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); +#endif + +#if defined(INF_ENGINE_RELEASE) + if (target == DNN_TARGET_MYRIAD + && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); +#endif + + std::vector refClassIds = { 6, 11 }; + std::vector refConfidences = { 0.750469f, 0.901615f }; + std::vector refBoxes = { Rect2d(240, 53, 135, 72), + Rect2d(58, 141, 117, 249) }; + + std::string img_path = _tf("dog416.png"); + std::string weights_file = _tf("yolo-voc.weights", false); + std::string config_file = _tf("yolo-voc.cfg"); + + double scale = 1.0 / 255.0; + Size size{ 416, 416 }; + bool swapRB = true; + bool crop = false; + bool nmsAcrossClasses = true; + + double confThreshold = 0.24; + double nmsThreshold = (target == DNN_TARGET_MYRIAD) ? 0.15: 0.15; + double scoreDiff = 8e-5, iouDiff = 1e-5; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 1e-2; + iouDiff = 1.6e-2; + } + + testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, + refBoxes, scoreDiff, iouDiff, confThreshold, nmsThreshold, size, + Scalar(), scale, swapRB, crop, + nmsAcrossClasses); +} + TEST_P(Test_Model, DetectionOutput) { #if defined(INF_ENGINE_RELEASE) From 91ce6ef190bd90d96fce50e3a26e870f50613d1e Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 1 Dec 2020 14:01:42 +0000 Subject: [PATCH 018/422] core(ipp): disable SSE4.2 code path in countNonZero() --- modules/core/src/count_non_zero.dispatch.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/core/src/count_non_zero.dispatch.cpp b/modules/core/src/count_non_zero.dispatch.cpp index 96b80c0d8c..cd3ed84350 100644 --- a/modules/core/src/count_non_zero.dispatch.cpp +++ b/modules/core/src/count_non_zero.dispatch.cpp @@ -62,11 +62,9 @@ static bool ipp_countNonZero( Mat &src, int &res ) { CV_INSTRUMENT_REGION_IPP(); -#if defined __APPLE__ || (defined _MSC_VER && defined _M_IX86) // see https://github.com/opencv/opencv/issues/17453 - if (src.dims <= 2 && src.step > 520000) + if (src.dims <= 2 && src.step > 520000 && cv::ipp::getIppTopFeatures() == ippCPUID_SSE42) return false; -#endif #if IPP_VERSION_X100 < 201801 // Poor performance of SSE42 From 29b453eb86513a2af4eb4bff4b988addc8b49729 Mon Sep 17 00:00:00 2001 From: Giles Payne Date: Wed, 18 Nov 2020 22:06:59 +0900 Subject: [PATCH 019/422] Objective-C name clash avoidance --- modules/objc/generator/gen_objc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/objc/generator/gen_objc.py b/modules/objc/generator/gen_objc.py index bd9743c757..61d5e140ab 100755 --- a/modules/objc/generator/gen_objc.py +++ b/modules/objc/generator/gen_objc.py @@ -570,7 +570,7 @@ def get_swift_type(ctype): return swift_type def build_swift_extension_decl(name, args, constructor, static, ret_type): - extension_decl = ("class " if static else "") + (("func " + name) if not constructor else "convenience init") + "(" + extension_decl = "@nonobjc " + ("class " if static else "") + (("func " + name) if not constructor else "convenience init") + "(" swift_args = [] for a in args: if a.ctype not in type_dict: From 38a4eaf8a32b4bc5a60db739eb6ac55819010f29 Mon Sep 17 00:00:00 2001 From: Igor Murzov Date: Mon, 23 Nov 2020 14:05:55 +0300 Subject: [PATCH 020/422] Orbbec tutorial: Sync frames from two streams and process depth & color simultaneously --- .../orbbec-astra/orbbec_astra.markdown | 45 ++++++---- .../videoio/orbbec_astra/orbbec_astra.cpp | 87 ++++++++++--------- 2 files changed, 76 insertions(+), 56 deletions(-) diff --git a/doc/tutorials/videoio/orbbec-astra/orbbec_astra.markdown b/doc/tutorials/videoio/orbbec-astra/orbbec_astra.markdown index 664e4f6dfe..8c5ebcd802 100644 --- a/doc/tutorials/videoio/orbbec-astra/orbbec_astra.markdown +++ b/doc/tutorials/videoio/orbbec-astra/orbbec_astra.markdown @@ -9,8 +9,8 @@ Using Orbbec Astra 3D cameras {#tutorial_orbbec_astra} This tutorial is devoted to the Astra Series of Orbbec 3D cameras (https://orbbec3d.com/product-astra-pro/). That cameras have a depth sensor in addition to a common color sensor. The depth sensors can be read using -the OpenNI interface with @ref cv::VideoCapture class. The video stream is provided through the regular camera -interface. +the open source OpenNI API with @ref cv::VideoCapture class. The video stream is provided through the regular +camera interface. ### Installation Instructions @@ -70,15 +70,20 @@ In order to use a depth sensor with OpenCV you should do the following steps: ### Code -To get both depth and color frames, two @ref cv::VideoCapture objects should be created: +The Astra Pro camera has two sensors -- a depth sensor and a color sensor. The depth sensors +can be read using the OpenNI interface with @ref cv::VideoCapture class. The video stream is +not available through OpenNI API and is only provided through the regular camera interface. +So, to get both depth and color frames, two @ref cv::VideoCapture objects should be created: @snippetlineno samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp Open streams -The first object will use the regular Video4Linux2 interface to access the color sensor. The second one +The first object will use the Video4Linux2 interface to access the color sensor. The second one is using OpenNI2 API to retrieve depth data. -Before using the created VideoCapture objects you may want to setup stream parameters by setting -objects' properties. The most important parameters are frame width, frame height and fps: +Before using the created VideoCapture objects you may want to set up stream parameters by setting +objects' properties. The most important parameters are frame width, frame height and fps. +For this example, we’ll configure width and height of both streams to VGA resolution as that’s +the maximum resolution available for both sensors and we’d like both stream parameters to be the same: @snippetlineno samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp Setup streams @@ -113,8 +118,9 @@ After the VideoCapture objects are set up you can start reading frames from them to avoid one stream blocking while another stream is being read. VideoCapture is not a thread-safe class, so you need to be careful to avoid any possible deadlocks or data races. -Example implementation that gets frames from each sensor in a new thread and stores them -in a list along with their timestamps: +As there are two video sources that should be read simultaneously, it’s necessary to create two +threads to avoid blocking. Example implementation that gets frames from each sensor in a new thread +and stores them in a list along with their timestamps: @snippetlineno samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp Read streams @@ -130,17 +136,24 @@ VideoCapture can retrieve the following data: -# data given from the color sensor is a regular BGR image (CV_8UC3). -When new data is available a reading thread notifies the main thread. A frame is stored in the -ordered list -- the first frame is the latest one: +When new data are available a reading thread notifies the main thread using a condition variable. +A frame is stored in the ordered list -- the first frame is the latest one. As depth and color frames +are read from independent sources two video streams may become out of sync even when both streams +are set up for the same frame rate. A post-synchronization procedure can be applied to the streams +to combine depth and color frames into pairs. The sample code below demonstrates this procedure: -@snippetlineno samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp Show color frame +@snippetlineno samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp Pair frames -Depth frames can be picked the same way from the `depthFrames` list. +In the code snippet above the execution is blocked until there are some frames in both frame lists. +When there are new frames, their timestamps are being checked -- if they differ more than a half of +the frame period then one of the frames is dropped. If timestamps are close enough, then two frames +are paired. Now, we have two frames: one containing color information and another one -- depth information. +In the example above retrieved frames are simply shown with cv::imshow function, but you can insert +any other processing code here. -After that, you'll have two frames: one containing color information and another one -- depth -information. In the sample images below you can see the color frame and the depth frame showing -the same scene. Looking at the color frame it's hard to distinguish plant leaves from leaves painted -on a wall, but the depth data makes it easy. +In the sample images below you can see the color frame and the depth frame representing the same scene. +Looking at the color frame it's hard to distinguish plant leaves from leaves painted on a wall, +but the depth data makes it easy. ![Color frame](images/astra_color.jpg) ![Depth frame](images/astra_depth.png) diff --git a/samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp b/samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp index a6dc6dd75c..bd626d5e32 100644 --- a/samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp +++ b/samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp @@ -69,7 +69,6 @@ int main() //! [Read streams] // Create two lists to store frames std::list depthFrames, colorFrames; - std::mutex depthFramesMtx, colorFramesMtx; const std::size_t maxFrames = 64; // Synchronization objects @@ -90,8 +89,6 @@ int main() Frame f; f.timestamp = cv::getTickCount(); depthStream.retrieve(f.frame, CAP_OPENNI_DEPTH_MAP); - //depthStream.retrieve(f.frame, CAP_OPENNI_DISPARITY_MAP); - //depthStream.retrieve(f.frame, CAP_OPENNI_IR_IMAGE); if (f.frame.empty()) { cerr << "ERROR: Failed to decode frame from depth stream" << endl; @@ -99,7 +96,7 @@ int main() } { - std::lock_guard lk(depthFramesMtx); + std::lock_guard lk(mtx); if (depthFrames.size() >= maxFrames) depthFrames.pop_front(); depthFrames.push_back(f); @@ -127,7 +124,7 @@ int main() } { - std::lock_guard lk(colorFramesMtx); + std::lock_guard lk(mtx); if (colorFrames.size() >= maxFrames) colorFrames.pop_front(); colorFrames.push_back(f); @@ -138,56 +135,66 @@ int main() }); //! [Read streams] - while (true) + //! [Pair frames] + // Pair depth and color frames + while (!isFinish) { std::unique_lock lk(mtx); - while (depthFrames.empty() && colorFrames.empty()) + while (!isFinish && (depthFrames.empty() || colorFrames.empty())) dataReady.wait(lk); - depthFramesMtx.lock(); - if (depthFrames.empty()) + while (!depthFrames.empty() && !colorFrames.empty()) { - depthFramesMtx.unlock(); - } - else - { - // Get a frame from the list - Mat depthMap = depthFrames.front().frame; - depthFrames.pop_front(); - depthFramesMtx.unlock(); + if (!lk.owns_lock()) + lk.lock(); + // Get a frame from the list + Frame depthFrame = depthFrames.front(); + int64 depthT = depthFrame.timestamp; + + // Get a frame from the list + Frame colorFrame = colorFrames.front(); + int64 colorT = colorFrame.timestamp; + + // Half of frame period is a maximum time diff between frames + const int64 maxTdiff = int64(1000000000 / (2 * colorStream.get(CAP_PROP_FPS))); + if (depthT + maxTdiff < colorT) + { + depthFrames.pop_front(); + continue; + } + else if (colorT + maxTdiff < depthT) + { + colorFrames.pop_front(); + continue; + } + depthFrames.pop_front(); + colorFrames.pop_front(); + lk.unlock(); + + //! [Show frames] // Show depth frame Mat d8, dColor; - depthMap.convertTo(d8, CV_8U, 255.0 / 2500); + depthFrame.frame.convertTo(d8, CV_8U, 255.0 / 2500); applyColorMap(d8, dColor, COLORMAP_OCEAN); imshow("Depth (colored)", dColor); - } - - //! [Show color frame] - colorFramesMtx.lock(); - if (colorFrames.empty()) - { - colorFramesMtx.unlock(); - } - else - { - // Get a frame from the list - Mat colorFrame = colorFrames.front().frame; - colorFrames.pop_front(); - colorFramesMtx.unlock(); // Show color frame - imshow("Color", colorFrame); + imshow("Color", colorFrame.frame); + //! [Show frames] + + // Exit on Esc key press + int key = waitKey(1); + if (key == 27) // ESC + { + isFinish = true; + break; + } } - //! [Show color frame] - - // Exit on Esc key press - int key = waitKey(1); - if (key == 27) // ESC - break; } + //! [Pair frames] - isFinish = true; + dataReady.notify_one(); depthReader.join(); colorReader.join(); From 673e4e20f02881aff08534d17acfb6370a31363a Mon Sep 17 00:00:00 2001 From: Zhangyin Date: Mon, 24 Aug 2020 12:10:42 +0800 Subject: [PATCH 021/422] Added RISC-V backend of universal intrinsics --- .../include/opencv2/core/cv_cpu_dispatch.h | 1 + .../core/include/opencv2/core/hal/intrin.hpp | 2 +- .../include/opencv2/core/hal/intrin_rvv.hpp | 4265 +++++++++++------ platforms/linux/riscv64-gcc.toolchain.cmake | 20 + 4 files changed, 2765 insertions(+), 1523 deletions(-) create mode 100644 platforms/linux/riscv64-gcc.toolchain.cmake diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h index eb3f8693c2..1827dbc7c6 100644 --- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h +++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h @@ -170,6 +170,7 @@ #if defined CV_CPU_COMPILE_RVV # define CV_RVV 1 +# include #endif #endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__ diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp index 0ffcb49cea..753cd21941 100644 --- a/modules/core/include/opencv2/core/hal/intrin.hpp +++ b/modules/core/include/opencv2/core/hal/intrin.hpp @@ -202,7 +202,7 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE; # undef CV_RVV #endif -#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD) && !defined(CV_FORCE_SIMD128_CPP) +#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD || CV_RVV) && !defined(CV_FORCE_SIMD128_CPP) #define CV__SIMD_FORWARD 128 #include "opencv2/core/hal/intrin_forward.hpp" #endif diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp index eca787c7fd..cf2d066a1e 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp @@ -5,306 +5,2310 @@ #ifndef OPENCV_HAL_INTRIN_RVV_HPP #define OPENCV_HAL_INTRIN_RVV_HPP -#include -#include #include -#include "opencv2/core/saturate.hpp" - -#define CV_SIMD128_CPP 1 -#if defined(CV_FORCE_SIMD128_CPP) || defined(CV_DOXYGEN) -#define CV_SIMD128 1 -#define CV_SIMD128_64F 1 -#endif namespace cv { -#ifndef CV_DOXYGEN CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN -#endif +#define CV_SIMD128 1 +#define CV_SIMD128_64F 1 -template struct v_reg +//////////// Unsupported native intrinsics in C++ //////////// + +struct vuint8mf2_t { - typedef _Tp lane_type; - enum { nlanes = n }; - - explicit v_reg(const _Tp* ptr) { for( int i = 0; i < n; i++ ) s[i] = ptr[i]; } - - v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; } - - v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; } - - v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, - _Tp s4, _Tp s5, _Tp s6, _Tp s7) + uchar val[8] = {0}; + vuint8mf2_t() {} + vuint8mf2_t(const uchar* ptr) { - s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; - s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7; - } - - v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, - _Tp s4, _Tp s5, _Tp s6, _Tp s7, - _Tp s8, _Tp s9, _Tp s10, _Tp s11, - _Tp s12, _Tp s13, _Tp s14, _Tp s15) - { - s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; - s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7; - s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11; - s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15; - } - - v_reg() {} - - v_reg(const v_reg<_Tp, n> & r) - { - for( int i = 0; i < n; i++ ) - s[i] = r.s[i]; - } - _Tp get0() const { return s[0]; } - - _Tp get(const int i) const { return s[i]; } - v_reg<_Tp, n> high() const - { - v_reg<_Tp, n> c; - int i; - for( i = 0; i < n/2; i++ ) + for (int i = 0; i < 8; ++i) { - c.s[i] = s[i+(n/2)]; - c.s[i+(n/2)] = 0; + val[i] = ptr[i]; } - return c; } - - static v_reg<_Tp, n> zero() +}; +struct vint8mf2_t +{ + schar val[8] = {0}; + vint8mf2_t() {} + vint8mf2_t(const schar* ptr) { - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = (_Tp)0; - return c; + for (int i = 0; i < 8; ++i) + { + val[i] = ptr[i]; + } } - - static v_reg<_Tp, n> all(_Tp s) +}; +struct vuint16mf2_t +{ + ushort val[4] = {0}; + vuint16mf2_t() {} + vuint16mf2_t(const ushort* ptr) { - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = s; - return c; + for (int i = 0; i < 4; ++i) + { + val[i] = ptr[i]; + } } - - template v_reg<_Tp2, n2> reinterpret_as() const +}; +struct vint16mf2_t +{ + short val[4] = {0}; + vint16mf2_t() {} + vint16mf2_t(const short* ptr) { - size_t bytes = std::min(sizeof(_Tp2)*n2, sizeof(_Tp)*n); - v_reg<_Tp2, n2> c; - std::memcpy(&c.s[0], &s[0], bytes); - return c; + for (int i = 0; i < 4; ++i) + { + val[i] = ptr[i]; + } } - - v_reg& operator=(const v_reg<_Tp, n> & r) +}; +struct vuint32mf2_t +{ + unsigned val[2] = {0}; + vuint32mf2_t() {} + vuint32mf2_t(const unsigned* ptr) { - for( int i = 0; i < n; i++ ) - s[i] = r.s[i]; - return *this; + val[0] = ptr[0]; + val[1] = ptr[1]; + } +}; +struct vint32mf2_t +{ + int val[2] = {0}; + vint32mf2_t() {} + vint32mf2_t(const int* ptr) + { + val[0] = ptr[0]; + val[1] = ptr[1]; + } +}; +struct vfloat32mf2_t +{ + float val[2] = {0}; + vfloat32mf2_t() {} + vfloat32mf2_t(const float* ptr) + { + val[0] = ptr[0]; + val[1] = ptr[1]; + } +}; +struct vuint64mf2_t +{ + uint64 val[1] = {0}; + vuint64mf2_t() {} + vuint64mf2_t(const uint64* ptr) + { + val[0] = ptr[0]; + } +}; +struct vint64mf2_t +{ + int64 val[1] = {0}; + vint64mf2_t() {} + vint64mf2_t(const int64* ptr) + { + val[0] = ptr[0]; + } +}; +struct vfloat64mf2_t +{ + double val[1] = {0}; + vfloat64mf2_t() {} + vfloat64mf2_t(const double* ptr) + { + val[0] = ptr[0]; + } +}; +struct vuint8mf4_t +{ + uchar val[4] = {0}; + vuint8mf4_t() {} + vuint8mf4_t(const uchar* ptr) + { + for (int i = 0; i < 4; ++i) + { + val[i] = ptr[i]; + } + } +}; +struct vint8mf4_t +{ + schar val[4] = {0}; + vint8mf4_t() {} + vint8mf4_t(const schar* ptr) + { + for (int i = 0; i < 4; ++i) + { + val[i] = ptr[i]; + } } - - _Tp s[n]; }; -typedef v_reg v_uint8x16; -typedef v_reg v_int8x16; -typedef v_reg v_uint16x8; -typedef v_reg v_int16x8; -typedef v_reg v_uint32x4; -typedef v_reg v_int32x4; -typedef v_reg v_float32x4; -typedef v_reg v_float64x2; -typedef v_reg v_uint64x2; -typedef v_reg v_int64x2; - -template CV_INLINE v_reg<_Tp, n> operator+(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); -template CV_INLINE v_reg<_Tp, n>& operator+=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); - -template CV_INLINE v_reg<_Tp, n> operator-(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); -template CV_INLINE v_reg<_Tp, n>& operator-=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); - -template CV_INLINE v_reg<_Tp, n> operator*(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); -template CV_INLINE v_reg<_Tp, n>& operator*=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); - -template CV_INLINE v_reg<_Tp, n> operator/(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); -template CV_INLINE v_reg<_Tp, n>& operator/=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); - - -template CV_INLINE v_reg<_Tp, n> operator&(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); -template CV_INLINE v_reg<_Tp, n>& operator&=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); - -template CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); -template CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); - -template CV_INLINE v_reg<_Tp, n> operator^(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); -template CV_INLINE v_reg<_Tp, n>& operator^=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); - -template CV_INLINE v_reg<_Tp, n> operator~(const v_reg<_Tp, n>& a); - - -#ifndef CV_DOXYGEN - -#define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, ...) \ -__CV_EXPAND(macro_name(uchar, __VA_ARGS__)) \ -__CV_EXPAND(macro_name(schar, __VA_ARGS__)) \ -__CV_EXPAND(macro_name(ushort, __VA_ARGS__)) \ -__CV_EXPAND(macro_name(short, __VA_ARGS__)) \ -__CV_EXPAND(macro_name(unsigned, __VA_ARGS__)) \ -__CV_EXPAND(macro_name(int, __VA_ARGS__)) \ -__CV_EXPAND(macro_name(uint64, __VA_ARGS__)) \ -__CV_EXPAND(macro_name(int64, __VA_ARGS__)) \ - -#define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, ...) \ -__CV_EXPAND(macro_name(float, __VA_ARGS__)) \ -__CV_EXPAND(macro_name(double, __VA_ARGS__)) \ - -#define CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(macro_name, ...) \ -CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, __VA_ARGS__) \ -CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, __VA_ARGS__) \ - -#define CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op) \ -template inline \ -v_reg<_Tp, n> operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +#define OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(_Tpvec, _Tp, suffix, width, n) \ +inline _Tpvec vle##width##_v_##suffix##mf2(const _Tp* ptr) \ { \ - v_reg<_Tp, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ - return c; \ + return _Tpvec(ptr); \ } \ -template inline \ -v_reg<_Tp, n>& operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +inline void vse##width##_v_##suffix##mf2(_Tp* ptr, _Tpvec v) \ { \ - for( int i = 0; i < n; i++ ) \ - a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ - return a; \ + for (int i = 0; i < n; ++i) \ + { \ + ptr[i] = v.val[i]; \ + } \ } -#define CV__HAL_INTRIN_IMPL_BIN_OP(bin_op) CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, bin_op) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vuint8mf2_t, uint8_t, u8, 8, 8) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vint8mf2_t, int8_t, i8, 8, 8) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vuint16mf2_t, uint16_t, u16, 16, 4) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vint16mf2_t, int16_t, i16, 16, 4) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vuint32mf2_t, uint32_t, u32, 32, 2) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vint32mf2_t, int32_t, i32, 32, 2) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vfloat32mf2_t, float32_t, f32, 32, 2) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vuint64mf2_t, uint64_t, u64, 64, 1) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vint64mf2_t, int64_t, i64, 64, 1) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vfloat64mf2_t, float64_t, f64, 64, 1) -CV__HAL_INTRIN_IMPL_BIN_OP(+) -CV__HAL_INTRIN_IMPL_BIN_OP(-) -CV__HAL_INTRIN_IMPL_BIN_OP(*) -CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, /) -#define CV__HAL_INTRIN_IMPL_BIT_OP_(_Tp, bit_op) \ -template CV_INLINE \ -v_reg<_Tp, n> operator bit_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +#define OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(_Tpwvec, _Tpvec, _wTp, wcvt, suffix, width, n) \ +inline _Tpwvec wcvt (_Tpvec v) \ { \ - v_reg<_Tp, n> c; \ - typedef typename V_TypeTraits<_Tp>::int_type itype; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \ - V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \ - return c; \ -} \ -template CV_INLINE \ -v_reg<_Tp, n>& operator bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - typedef typename V_TypeTraits<_Tp>::int_type itype; \ - for( int i = 0; i < n; i++ ) \ - a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \ - V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \ - return a; \ + _wTp tmp[n]; \ + for (int i = 0; i < n; ++i) \ + { \ + tmp[i] = (_wTp)v.val[i]; \ + } \ + vsetvlmax_e##width##m1(); \ + return vle##width##_v_##suffix##m1(tmp); \ } -#define CV__HAL_INTRIN_IMPL_BIT_OP(bit_op) \ -CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) \ -CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) /* TODO: FIXIT remove this after masks refactoring */ +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vuint16m1_t, vuint8mf2_t, ushort, vwcvtu_x_x_v_u16m1, u16, 16, 8) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vint16m1_t, vint8mf2_t, short, vwcvt_x_x_v_i16m1, i16, 16, 8) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vuint32m1_t, vuint16mf2_t, unsigned, vwcvtu_x_x_v_u32m1, u32, 32, 4) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vint32m1_t, vint16mf2_t, int, vwcvt_x_x_v_i32m1, i32, 32, 4) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vuint64m1_t, vuint32mf2_t, uint64, vwcvtu_x_x_v_u64m1, u64, 64, 2) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vint64m1_t, vint32mf2_t, int64, vwcvt_x_x_v_i64m1, i64, 64, 2) +inline vuint8mf4_t vle8_v_u8mf4 (const uint8_t *base) +{ + return vuint8mf4_t(base); +} +inline vint8mf4_t vle8_v_i8mf4 (const int8_t *base) +{ + return vint8mf4_t(base); +} -CV__HAL_INTRIN_IMPL_BIT_OP(&) -CV__HAL_INTRIN_IMPL_BIT_OP(|) -CV__HAL_INTRIN_IMPL_BIT_OP(^) +inline vuint16mf2_t vwcvtu_x_x_v_u16mf2 (vuint8mf4_t src) +{ + ushort tmp[4]; + for (int i = 0; i < 4; ++i) + { + tmp[i] = (ushort)src.val[i]; + } + return vle16_v_u16mf2(tmp); +} +inline vint16mf2_t vwcvt_x_x_v_i16mf2 (vint8mf4_t src) +{ + short tmp[4]; + for (int i = 0; i < 4; ++i) + { + tmp[i] = (short)src.val[i]; + } + return vle16_v_i16mf2(tmp); +} -#define CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy) \ -template CV_INLINE \ -v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) \ -{ \ - v_reg<_Tp, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); \ - return c; \ -} \ +//////////// Types //////////// -CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BITWISE_NOT_, ~) +struct v_uint8x16 +{ + typedef uchar lane_type; + enum { nlanes = 16 }; + v_uint8x16() {} + explicit v_uint8x16(vuint8m1_t v) + { + vsetvlmax_e8m1(); + vse8_v_u8m1(val, v); + } + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + { + uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vuint8m1_t() const + { + vsetvlmax_e8m1(); + return vle8_v_u8m1(val); + } + uchar get0() const + { + return val[0]; + } + + uchar val[16]; +}; + +struct v_int8x16 +{ + typedef schar lane_type; + enum { nlanes = 16 }; + + v_int8x16() {} + explicit v_int8x16(vint8m1_t v) + { + vsetvlmax_e8m1(); + vse8_v_i8m1(val, v); + } + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + { + schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vint8m1_t() const + { + vsetvlmax_e8m1(); + return vle8_v_i8m1(val); + } + schar get0() const + { + return val[0]; + } + + schar val[16]; +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + enum { nlanes = 8 }; + + v_uint16x8() {} + explicit v_uint16x8(vuint16m1_t v) + { + vsetvlmax_e16m1(); + vse16_v_u16m1(val, v); + } + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + { + ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vuint16m1_t() const + { + vsetvlmax_e16m1(); + return vle16_v_u16m1(val); + } + ushort get0() const + { + return val[0]; + } + + ushort val[8]; +}; + +struct v_int16x8 +{ + typedef short lane_type; + enum { nlanes = 8 }; + + v_int16x8() {} + explicit v_int16x8(vint16m1_t v) + { + vsetvlmax_e16m1(); + vse16_v_i16m1(val, v); + } + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + { + short v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vint16m1_t() const + { + vsetvlmax_e16m1(); + return vle16_v_i16m1(val); + } + short get0() const + { + return val[0]; + } + + short val[8]; +}; + +struct v_uint32x4 +{ + typedef unsigned lane_type; + enum { nlanes = 4 }; + + v_uint32x4() {} + explicit v_uint32x4(vuint32m1_t v) + { + vsetvlmax_e32m1(); + vse32_v_u32m1(val, v); + } + v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) + { + unsigned v[] = {v0, v1, v2, v3}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vuint32m1_t() const + { + vsetvlmax_e32m1(); + return vle32_v_u32m1(val); + } + unsigned get0() const + { + return val[0]; + } + + unsigned val[4]; +}; + +struct v_int32x4 +{ + typedef int lane_type; + enum { nlanes = 4 }; + + v_int32x4() {} + explicit v_int32x4(vint32m1_t v) + { + vsetvlmax_e32m1(); + vse32_v_i32m1(val, v); + } + v_int32x4(int v0, int v1, int v2, int v3) + { + int v[] = {v0, v1, v2, v3}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vint32m1_t() const + { + vsetvlmax_e32m1(); + return vle32_v_i32m1(val); + } + int get0() const + { + return val[0]; + } + int val[4]; +}; + +struct v_float32x4 +{ + typedef float lane_type; + enum { nlanes = 4 }; + + v_float32x4() {} + explicit v_float32x4(vfloat32m1_t v) + { + vsetvlmax_e32m1(); + vse32_v_f32m1(val, v); + } + v_float32x4(float v0, float v1, float v2, float v3) + { + float v[] = {v0, v1, v2, v3}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vfloat32m1_t() const + { + vsetvlmax_e32m1(); + return vle32_v_f32m1(val); + } + float get0() const + { + return val[0]; + } + float val[4]; +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + enum { nlanes = 2 }; + + v_uint64x2() {} + explicit v_uint64x2(vuint64m1_t v) + { + vsetvlmax_e64m1(); + vse64_v_u64m1(val, v); + } + v_uint64x2(uint64 v0, uint64 v1) + { + uint64 v[] = {v0, v1}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vuint64m1_t() const + { + vsetvlmax_e64m1(); + return vle64_v_u64m1(val); + } + uint64 get0() const + { + return val[0]; + } + + uint64 val[2]; +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + enum { nlanes = 2 }; + + v_int64x2() {} + explicit v_int64x2(vint64m1_t v) + { + vsetvlmax_e64m1(); + vse64_v_i64m1(val, v); + } + v_int64x2(int64 v0, int64 v1) + { + int64 v[] = {v0, v1}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vint64m1_t() const + { + vsetvlmax_e64m1(); + return vle64_v_i64m1(val); + } + int64 get0() const + { + return val[0]; + } + + int64 val[2]; +}; + +#if CV_SIMD128_64F +struct v_float64x2 +{ + typedef double lane_type; + enum { nlanes = 2 }; + + v_float64x2() {} + explicit v_float64x2(vfloat64m1_t v) + { + vsetvlmax_e64m1(); + vse64_v_f64m1(val, v); + } + v_float64x2(double v0, double v1) + { + double v[] = {v0, v1}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vfloat64m1_t() const + { + vsetvlmax_e64m1(); + return vle64_v_f64m1(val); + } + double get0() const + { + return val[0]; + } + + double val[2]; +}; #endif -#define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \ -template inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \ -{ \ - v_reg<_Tp2, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = cfunc(a.s[i]); \ - return c; \ -} +//////////// Initial //////////// -#define OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(func, cfunc) \ -inline v_reg func(const v_reg& a) \ +#define OPENCV_HAL_IMPL_RVV_INIT_INTEGER(_Tpvec, _Tp, width, suffix1, suffix2) \ +inline v_##_Tpvec v_setzero_##suffix1() \ { \ - v_reg c; \ - for( int i = 0; i < 4; i++ ) \ - c.s[i] = cfunc(a.s[i]); \ - return c; \ + vsetvlmax_e##width##m1(); \ + return v_##_Tpvec(vzero_##suffix2##m1()); \ } \ -inline v_reg func(const v_reg& a) \ +inline v_##_Tpvec v_setall_##suffix1(_Tp v) \ { \ - v_reg c; \ - for( int i = 0; i < 2; i++ ) \ + vsetvlmax_e##width##m1(); \ + return v_##_Tpvec(vmv_v_x_##suffix2##m1(v)); \ +} + +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint8x16, uchar, 8, u8, u8) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int8x16, schar, 8, s8, i8) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint16x8, ushort, 16, u16, u16) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int16x8, short, 16, s16, i16) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint32x4, unsigned, 32, u32, u32) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int32x4, int, 32, s32, i32) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint64x2, uint64, 64, u64, u64) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int64x2, int64, 64, s64, i64) + +#define OPENCV_HAL_IMPL_RVV_INIT_FP(_Tpv, _Tp, width, suffix) \ +inline v_##_Tpv v_setzero_##suffix() \ +{ \ + vsetvlmax_e##width##m1(); \ + return v_##_Tpv(vzero_##suffix##m1()); \ +} \ +inline v_##_Tpv v_setall_##suffix(_Tp v) \ +{ \ + vsetvlmax_e##width##m1(); \ + return v_##_Tpv(vfmv_v_f_##suffix##m1(v)); \ +} + +OPENCV_HAL_IMPL_RVV_INIT_FP(float32x4, float, 32, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_INIT_FP(float64x2, double, 64, f64) +#endif + +//////////// Reinterpret //////////// + +#define OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(_Tpvec, suffix) \ +inline v_##_Tpvec v_reinterpret_as_##suffix(const v_##_Tpvec& v) { return v; } + +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint8x16, u8) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int8x16, s8) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint16x8, u16) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int16x8, s16) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint32x4, u32) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int32x4, s32) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(float32x4, f32) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint64x2, u64) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int64x2, s64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(float64x2, f64) +#endif + +#define OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(_Tpvec1, _Tpvec2, _nTpvec1, _nTpvec2, suffix1, suffix2, nsuffix1, nsuffix2, width1, width2) \ +inline v_##_Tpvec1 v_reinterpret_as_##suffix1(const v_##_Tpvec2& v) \ +{ \ + vsetvlmax_e##width2##m1(); \ + return v_##_Tpvec1((_nTpvec1)vle##width2##_v_##nsuffix2##m1(v.val)); \ +} \ +inline v_##_Tpvec2 v_reinterpret_as_##suffix2(const v_##_Tpvec1& v) \ +{ \ + vsetvlmax_e##width1##m1(); \ + return v_##_Tpvec2((_nTpvec2)vle##width1##_v_##nsuffix1##m1(v.val)); \ +} + +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int8x16, vuint8m1_t, vint8m1_t, u8, s8, u8, i8, 8, 8) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int16x8, vuint16m1_t, vint16m1_t, u16, s16, u16, i16, 16, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int32x4, vuint32m1_t, vint32m1_t, u32, s32, u32, i32, 32, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, float32x4, vuint32m1_t, vfloat32m1_t, u32, f32, u32, f32, 32, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int32x4, float32x4, vint32m1_t, vfloat32m1_t, s32, f32, i32, f32, 32, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int64x2, vuint64m1_t, vint64m1_t, u64, s64, u64, i64, 64, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, uint16x8, vuint8m1_t, vuint16m1_t, u8, u16, u8, u16, 8, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, uint32x4, vuint8m1_t, vuint32m1_t, u8, u32, u8, u32, 8, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, uint64x2, vuint8m1_t, vuint64m1_t, u8, u64, u8, u64, 8, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, uint32x4, vuint16m1_t, vuint32m1_t, u16, u32, u16, u32, 16, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, uint64x2, vuint16m1_t, vuint64m1_t, u16, u64, u16, u64, 16, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, uint64x2, vuint32m1_t, vuint64m1_t, u32, u64, u32, u64, 32, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, int16x8, vint8m1_t, vint16m1_t, s8, s16, i8, i16, 8, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, int32x4, vint8m1_t, vint32m1_t, s8, s32, i8, i32, 8, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, int64x2, vint8m1_t, vint64m1_t, s8, s64, i8, i64, 8, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, int32x4, vint16m1_t, vint32m1_t, s16, s32, i16, i32, 16, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, int64x2, vint16m1_t, vint64m1_t, s16, s64, i16, i64, 16, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int32x4, int64x2, vint32m1_t, vint64m1_t, s32, s64, i32, i64, 32, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int16x8, vuint8m1_t, vint16m1_t, u8, s16, u8, i16, 8, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int32x4, vuint8m1_t, vint32m1_t, u8, s32, u8, i32, 8, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int64x2, vuint8m1_t, vint64m1_t, u8, s64, u8, i64, 8, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int8x16, vuint16m1_t, vint8m1_t, u16, s8, u16, i8, 16, 8) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int32x4, vuint16m1_t, vint32m1_t, u16, s32, u16, i32, 16, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int64x2, vuint16m1_t, vint64m1_t, u16, s64, u16, i64, 16, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int8x16, vuint32m1_t, vint8m1_t, u32, s8, u32, i8, 32, 8) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int16x8, vuint32m1_t, vint16m1_t, u32, s16, u32, i16, 32, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int64x2, vuint32m1_t, vint64m1_t, u32, s64, u32, i64, 32, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int8x16, vuint64m1_t, vint8m1_t, u64, s8, u64, i8, 64, 8) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int16x8, vuint64m1_t, vint16m1_t, u64, s16, u64, i16, 64, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int32x4, vuint64m1_t, vint32m1_t, u64, s32, u64, i32, 64, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, float32x4, vuint8m1_t, vfloat32m1_t, u8, f32, u8, f32, 8, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, float32x4, vuint16m1_t, vfloat32m1_t, u16, f32, u16, f32, 16, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, float32x4, vuint64m1_t, vfloat32m1_t, u64, f32, u64, f32, 64, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, float32x4, vint8m1_t, vfloat32m1_t, s8, f32, i8, f32, 8, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, float32x4, vint16m1_t, vfloat32m1_t, s16, f32, i16, f32, 16, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int64x2, float32x4, vint64m1_t, vfloat32m1_t, s64, f32, i64, f32, 64, 32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, float64x2, vuint64m1_t, vfloat64m1_t, u64, f64, u64, f64, 64, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int64x2, float64x2, vint64m1_t, vfloat64m1_t, s64, f64, i64, f64, 64, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, float64x2, vuint8m1_t, vfloat64m1_t, u8, f64, u8, f64, 8, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, float64x2, vuint16m1_t, vfloat64m1_t, u16, f64, u16, f64, 16, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, float64x2, vuint32m1_t, vfloat64m1_t, u32, f64, u32, f64, 32, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, float64x2, vint8m1_t, vfloat64m1_t, s8, f64, i8, f64, 8, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, float64x2, vint16m1_t, vfloat64m1_t, s16, f64, i16, f64, 16, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int32x4, float64x2, vint32m1_t, vfloat64m1_t, s32, f64, i32, f64, 32, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(float32x4, float64x2, vfloat32m1_t, vfloat64m1_t, f32, f64, f32, f64, 32, 64) +#endif + +////////////// Extract ////////////// + +#define OPENCV_HAL_IMPL_RVV_EXTRACT(_Tpvec, _Tp, suffix, width, vmv) \ +template \ +inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), a, s), b, _Tpvec::nlanes - s)); \ +} \ +template inline _Tp v_extract_n(_Tpvec v) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tp(vmv(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), v, i))); \ +} + + +OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint8x16, uchar, u8, 8, vmv_x_s_u8m1_u8) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_int8x16, schar, i8, 8, vmv_x_s_i8m1_i8) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint16x8, ushort, u16, 16, vmv_x_s_u16m1_u16) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_int16x8, short, i16, 16, vmv_x_s_i16m1_i16) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint32x4, uint, u32, 32, vmv_x_s_u32m1_u32) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_int32x4, int, i32, 32, vmv_x_s_i32m1_i32) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint64x2, uint64, u64, 64, vmv_x_s_u64m1_u64) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_int64x2, int64, i64, 64, vmv_x_s_i64m1_i64) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_float32x4, float, f32, 32, vfmv_f_s_f32m1_f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_EXTRACT(v_float64x2, double, f64, 64, vfmv_f_s_f64m1_f64) +#endif + +////////////// Load/Store ////////////// + +#define OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(_Tpvec, _nTpvec, _Tp, hvl, width, suffix) \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ \ + vsetvlmax_e8m1(); \ + return _Tpvec((_nTpvec)vle8_v_u8m1((uchar*)ptr)); \ +} \ +inline _Tpvec v_load_aligned(const _Tp* ptr) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vle##width##_v_##suffix##m1(ptr)); \ +} \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ \ + vsetvl_e##width##m1(hvl); \ + _Tpvec res = _Tpvec(vle##width##_v_##suffix##m1(ptr)); \ + vsetvlmax_e##width##m1(); \ + return res; \ +} \ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ \ + vsetvlmax_e8m1(); \ + vse8_v_u8m1((uchar*)ptr, vle8_v_u8m1((uchar*)a.val)); \ +} \ +inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + vse##width##_v_##suffix##m1(ptr, a); \ +} \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + vse##width##_v_##suffix##m1(ptr, a); \ +} \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \ +{ \ + vsetvlmax_e##width##m1(); \ + vse##width##_v_##suffix##m1(ptr, a); \ +} \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ \ + _Tp CV_DECL_ALIGNED(32) tmp_ptr[_Tpvec::nlanes] = {0}; \ + vsetvlmax_e##width##m1(); \ + vse##width##_v_##suffix##m1(tmp_ptr, a); \ + for(int i = 0; i < _Tpvec::nlanes/2; ++i) \ { \ - c.s[i] = cfunc(a.s[i]); \ - c.s[i + 2] = 0; \ + ptr[i] = tmp_ptr[i]; \ } \ - return c; \ -} - -OPENCV_HAL_IMPL_MATH_FUNC(v_sqrt, std::sqrt, _Tp) - -OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp) -OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp) -OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp) -OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp) - -OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs, - typename V_TypeTraits<_Tp>::abs_type) - -OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_round, cvRound) - -OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_floor, cvFloor) - -OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_ceil, cvCeil) - -OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_trunc, int) - -#define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \ -template inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +} \ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ { \ - v_reg<_Tp, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = cfunc(a.s[i], b.s[i]); \ - return c; \ + _Tp CV_DECL_ALIGNED(32) tmp_ptr[_Tpvec::nlanes] = {0}; \ + vsetvlmax_e##width##m1(); \ + vse##width##_v_##suffix##m1(tmp_ptr, a); \ + for(int i = 0; i < _Tpvec::nlanes/2; ++i) \ + { \ + ptr[i] = tmp_ptr[i+_Tpvec::nlanes/2]; \ + } \ } -#define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \ -template inline _Tp func(const v_reg<_Tp, n>& a) \ +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint8x16, vuint8m1_t, uchar, 8, 8, u8) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int8x16, vint8m1_t, schar, 8, 8, i8) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint16x8, vuint16m1_t, ushort, 4, 16, u16) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int16x8, vint16m1_t, short, 4, 16, i16) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint32x4, vuint32m1_t, unsigned, 2, 32, u32) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int32x4, vint32m1_t, int, 2, 32, i32) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint64x2, vuint64m1_t, uint64, 1, 64, u64) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int64x2, vint64m1_t, int64, 1, 64, i64) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_float32x4, vfloat32m1_t, float, 2, 32, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_float64x2, vfloat64m1_t, double, 1, 64, f64) +#endif + +inline v_int8x16 v_load_halves(const schar* ptr0, const schar* ptr1) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + ptr0[0], ptr0[1], ptr0[2], ptr0[3], ptr0[4], ptr0[5], ptr0[6], ptr0[7], + ptr1[0], ptr1[1], ptr1[2], ptr1[3], ptr1[4], ptr1[5], ptr1[6], ptr1[7] + }; + vsetvlmax_e8m1(); + return v_int8x16(vle8_v_i8m1(elems)); +} +inline v_uint8x16 v_load_halves(const uchar* ptr0, const uchar* ptr1) { return v_reinterpret_as_u8(v_load_halves((schar*)ptr0, (schar*)ptr1)); } + +inline v_int16x8 v_load_halves(const short* ptr0, const short* ptr1) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + ptr0[0], ptr0[1], ptr0[2], ptr0[3], ptr1[0], ptr1[1], ptr1[2], ptr1[3] + }; + vsetvlmax_e16m1(); + return v_int16x8(vle16_v_i16m1(elems)); +} +inline v_uint16x8 v_load_halves(const ushort* ptr0, const ushort* ptr1) { return v_reinterpret_as_u16(v_load_halves((short*)ptr0, (short*)ptr1)); } + +inline v_int32x4 v_load_halves(const int* ptr0, const int* ptr1) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + ptr0[0], ptr0[1], ptr1[0], ptr1[1] + }; + vsetvlmax_e32m1(); + return v_int32x4(vle32_v_i32m1(elems)); +} +inline v_float32x4 v_load_halves(const float* ptr0, const float* ptr1) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + ptr0[0], ptr0[1], ptr1[0], ptr1[1] + }; + vsetvlmax_e32m1(); + return v_float32x4(vle32_v_f32m1(elems)); +} +inline v_uint32x4 v_load_halves(const unsigned* ptr0, const unsigned* ptr1) { return v_reinterpret_as_u32(v_load_halves((int*)ptr0, (int*)ptr1)); } + +inline v_int64x2 v_load_halves(const int64* ptr0, const int64* ptr1) +{ + int64 CV_DECL_ALIGNED(32) elems[2] = + { + ptr0[0], ptr1[0] + }; + vsetvlmax_e64m1(); + return v_int64x2(vle64_v_i64m1(elems)); +} +inline v_uint64x2 v_load_halves(const uint64* ptr0, const uint64* ptr1) { return v_reinterpret_as_u64(v_load_halves((int64*)ptr0, (int64*)ptr1)); } + +#if CV_SIMD128_64F +inline v_float64x2 v_load_halves(const double* ptr0, const double* ptr1) +{ + double CV_DECL_ALIGNED(32) elems[2] = + { + ptr0[0], ptr1[0] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} +#endif + + +////////////// Lookup table access //////////////////// + +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[ 0]], + tab[idx[ 1]], + tab[idx[ 2]], + tab[idx[ 3]], + tab[idx[ 4]], + tab[idx[ 5]], + tab[idx[ 6]], + tab[idx[ 7]], + tab[idx[ 8]], + tab[idx[ 9]], + tab[idx[10]], + tab[idx[11]], + tab[idx[12]], + tab[idx[13]], + tab[idx[14]], + tab[idx[15]] + }; + vsetvlmax_e8m1(); + return v_int8x16(vle8_v_i8m1(elems)); +} +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[4]], + tab[idx[4] + 1], + tab[idx[5]], + tab[idx[5] + 1], + tab[idx[6]], + tab[idx[6] + 1], + tab[idx[7]], + tab[idx[7] + 1] + }; + vsetvlmax_e8m1(); + return v_int8x16(vle8_v_i8m1(elems)); +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[0] + 2], + tab[idx[0] + 3], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[1] + 2], + tab[idx[1] + 3], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[2] + 2], + tab[idx[2] + 3], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[3] + 2], + tab[idx[3] + 3] + }; + vsetvlmax_e8m1(); + return v_int8x16(vle8_v_i8m1(elems)); +} +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((schar*)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((schar*)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((schar*)tab, idx)); } + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]], + tab[idx[4]], + tab[idx[5]], + tab[idx[6]], + tab[idx[7]] + }; + vsetvlmax_e16m1(); + return v_int16x8(vle16_v_i16m1(elems)); +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1] + }; + vsetvlmax_e16m1(); + return v_int16x8(vle16_v_i16m1(elems)); +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[0] + 2], + tab[idx[0] + 3], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[1] + 2], + tab[idx[1] + 3] + }; + vsetvlmax_e16m1(); + return v_int16x8(vle16_v_i16m1(elems)); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((short*)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((short*)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((short*)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + vsetvlmax_e32m1(); + return v_int32x4(vle32_v_i32m1(elems)); +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1] + }; + vsetvlmax_e32m1(); + return v_int32x4(vle32_v_i32m1(elems)); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + vsetvlmax_e32m1(); + return v_int32x4(vle32_v_i32m1(tab + idx[0])); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((int*)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((int*)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((int*)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + int64_t CV_DECL_ALIGNED(32) elems[2] = + { + tab[idx[0]], + tab[idx[1]] + }; + vsetvlmax_e64m1(); + return v_int64x2(vle64_v_i64m1(elems)); +} +inline v_int64x2 v_lut_pairs(const int64* tab, const int* idx) +{ + vsetvlmax_e64m1(); + return v_int64x2(vle64_v_i64m1(tab + idx[0])); +} +inline v_uint64x2 v_lut(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } +inline v_uint64x2 v_lut_pairs(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + vsetvlmax_e32m1(); + return v_float32x4(vle32_v_f32m1(elems)); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1] + }; + vsetvlmax_e32m1(); + return v_float32x4(vle32_v_f32m1(elems)); +} +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) +{ + vsetvlmax_e32m1(); + return v_float32x4(vle32_v_f32m1(tab + idx[0])); +} + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[v_extract_n<0>(idxvec)], + tab[v_extract_n<1>(idxvec)], + tab[v_extract_n<2>(idxvec)], + tab[v_extract_n<3>(idxvec)] + }; + vsetvlmax_e32m1(); + return v_int32x4(vle32_v_i32m1(elems)); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + unsigned CV_DECL_ALIGNED(32) elems[4] = + { + tab[v_extract_n<0>(idxvec)], + tab[v_extract_n<1>(idxvec)], + tab[v_extract_n<2>(idxvec)], + tab[v_extract_n<3>(idxvec)] + }; + vsetvlmax_e32m1(); + return v_uint32x4(vle32_v_u32m1(elems)); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[v_extract_n<0>(idxvec)], + tab[v_extract_n<1>(idxvec)], + tab[v_extract_n<2>(idxvec)], + tab[v_extract_n<3>(idxvec)] + }; + vsetvlmax_e32m1(); + return v_float32x4(vle32_v_f32m1(elems)); +} + +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + x = v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); + y = v_float32x4(tab[idx[0]+1], tab[idx[1]+1], tab[idx[2]+1], tab[idx[3]+1]); +} + +#if CV_SIMD128_64F +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + double CV_DECL_ALIGNED(32) elems[2] = + { + tab[idx[0]], + tab[idx[1]] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} + +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) +{ + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(tab + idx[0])); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + double CV_DECL_ALIGNED(32) elems[2] = + { + tab[v_extract_n<0>(idxvec)], + tab[v_extract_n<1>(idxvec)] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + int CV_DECL_ALIGNED(32) idx[4] = {0}; + v_store_aligned(idx, idxvec); + + x = v_float64x2(tab[idx[0]], tab[idx[1]]); + y = v_float64x2(tab[idx[0]+1], tab[idx[1]+1]); +} +#endif + +////////////// Pack boolean //////////////////// + +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + ushort CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_store(ptr, a); + v_store(ptr + 8, b); + vsetvlmax_e8m1(); + return v_uint8x16(vnsrl_wx_u8m1(vle16_v_u16m2(ptr), 0)); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_store(ptr, a); + v_store(ptr + 4, b); + v_store(ptr + 8, c); + v_store(ptr + 12, d); + vsetvlmax_e8m1(); + return v_uint8x16(vnsrl_wx_u8m1(vnsrl_wx_u16m2(vle32_v_u32m4(ptr), 0), 0)); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + uint64 CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_store(ptr, a); + v_store(ptr + 2, b); + v_store(ptr + 4, c); + v_store(ptr + 6, d); + v_store(ptr + 8, e); + v_store(ptr + 10, f); + v_store(ptr + 12, g); + v_store(ptr + 14, h); + vsetvlmax_e8m1(); + return v_uint8x16(vnsrl_wx_u8m1(vnsrl_wx_u16m2(vnsrl_wx_u32m4(vle64_v_u64m8(ptr), 0), 0), 0)); +} + +////////////// Arithmetics ////////////// +#define OPENCV_HAL_IMPL_RVV_BIN_OP(bin_op, _Tpvec, intrin, width) \ +inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ { \ - _Tp c = a.s[0]; \ - for( int i = 1; i < n; i++ ) \ - c = cfunc(c, a.s[i]); \ - return c; \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(intrin(a, b)); \ +} \ +inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + a = _Tpvec(intrin(a, b)); \ + return a; \ } -OPENCV_HAL_IMPL_MINMAX_FUNC(v_min, std::min) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint8x16, vsaddu_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint8x16, vssubu_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint8x16, vdivu_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int8x16, vsadd_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int8x16, vssub_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int8x16, vdiv_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint16x8, vsaddu_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint16x8, vssubu_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint16x8, vdivu_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int16x8, vsadd_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int16x8, vssub_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int16x8, vdiv_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint32x4, vadd_vv_u32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint32x4, vsub_vv_u32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_uint32x4, vmul_vv_u32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint32x4, vdivu_vv_u32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int32x4, vadd_vv_i32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int32x4, vsub_vv_i32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_int32x4, vmul_vv_i32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int32x4, vdiv_vv_i32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_float32x4, vfadd_vv_f32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_float32x4, vfsub_vv_f32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_float32x4, vfmul_vv_f32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_float32x4, vfdiv_vv_f32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint64x2, vadd_vv_u64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint64x2, vsub_vv_u64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_uint64x2, vmul_vv_u64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint64x2, vdivu_vv_u64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int64x2, vadd_vv_i64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int64x2, vsub_vv_i64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_int64x2, vmul_vv_i64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int64x2, vdiv_vv_i64m1, 64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_float64x2, vfadd_vv_f64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_float64x2, vfsub_vv_f64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_float64x2, vfmul_vv_f64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_float64x2, vfdiv_vv_f64m1, 64) +#endif -OPENCV_HAL_IMPL_MINMAX_FUNC(v_max, std::max) -OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min) +////////////// Bitwise logic ////////////// -OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max) +#define OPENCV_HAL_IMPL_RVV_LOGIC_OP(_Tpvec, suffix, width) \ +OPENCV_HAL_IMPL_RVV_BIN_OP(&, _Tpvec, vand_vv_##suffix##m1, width) \ +OPENCV_HAL_IMPL_RVV_BIN_OP(|, _Tpvec, vor_vv_##suffix##m1, width) \ +OPENCV_HAL_IMPL_RVV_BIN_OP(^, _Tpvec, vxor_vv_##suffix##m1, width) \ +inline _Tpvec operator ~ (const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vnot_v_##suffix##m1(a)); \ +} + +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint8x16, u8, 8) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int8x16, i8, 8) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint16x8, u16, 16) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int16x8, i16, 16) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint32x4, u32, 32) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int32x4, i32, 32) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint64x2, u64, 64) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int64x2, i64, 64) + +#define OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(bin_op, intrin) \ +inline v_float32x4 operator bin_op (const v_float32x4& a, const v_float32x4& b) \ +{ \ + vsetvlmax_e32m1(); \ + return v_float32x4(vreinterpret_v_i32m1_f32m1(intrin(vreinterpret_v_f32m1_i32m1(a), vreinterpret_v_f32m1_i32m1(b)))); \ +} \ +inline v_float32x4& operator bin_op##= (v_float32x4& a, const v_float32x4& b) \ +{ \ + vsetvlmax_e32m1(); \ + a = v_float32x4(vreinterpret_v_i32m1_f32m1(intrin(vreinterpret_v_f32m1_i32m1(a), vreinterpret_v_f32m1_i32m1(b)))); \ + return a; \ +} + +OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(&, vand_vv_i32m1) +OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(|, vor_vv_i32m1) +OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(^, vxor_vv_i32m1) + +inline v_float32x4 operator ~ (const v_float32x4& a) +{ + vsetvlmax_e32m1(); + return v_float32x4(vreinterpret_v_i32m1_f32m1(vnot_v_i32m1(vreinterpret_v_f32m1_i32m1(a)))); +} + +#if CV_SIMD128_64F +#define OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(bin_op, intrin) \ +inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \ +{ \ + vsetvlmax_e64m1(); \ + return v_float64x2(vreinterpret_v_i64m1_f64m1(intrin(vreinterpret_v_f64m1_i64m1(a), vreinterpret_v_f64m1_i64m1(b)))); \ +} \ +inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \ +{ \ + vsetvlmax_e64m1(); \ + a = v_float64x2(vreinterpret_v_i64m1_f64m1(intrin(vreinterpret_v_f64m1_i64m1(a), vreinterpret_v_f64m1_i64m1(b)))); \ + return a; \ +} + +OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(&, vand_vv_i64m1) +OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(|, vor_vv_i64m1) +OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(^, vxor_vv_i64m1) + +inline v_float64x2 operator ~ (const v_float64x2& a) +{ + vsetvlmax_e64m1(); + return v_float64x2(vreinterpret_v_i64m1_f64m1(vnot_v_i64m1(vreinterpret_v_f64m1_i64m1(a)))); +} +#endif + +////////////// Bitwise shifts ////////////// + +#define OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(_Tpvec, suffix, width) \ +inline _Tpvec operator << (const _Tpvec& a, int n) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \ +} \ +inline _Tpvec operator >> (const _Tpvec& a, int n) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsrl_vx_##suffix##m1(a, uint8_t(n))); \ +} \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \ +} \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsrl_vx_##suffix##m1(a, uint8_t(n))); \ +} + +#define OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(_Tpvec, suffix, width) \ +inline _Tpvec operator << (const _Tpvec& a, int n) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \ +} \ +inline _Tpvec operator >> (const _Tpvec& a, int n) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsra_vx_##suffix##m1(a, uint8_t(n))); \ +} \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \ +} \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsra_vx_##suffix##m1(a, uint8_t(n))); \ +} + +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint8x16, u8, 8) +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint16x8, u16, 16) +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint32x4, u32, 32) +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint64x2, u64, 64) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int8x16, i8, 8) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int16x8, i16, 16) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int32x4, i32, 32) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int64x2, i64, 64) + + +////////////// Comparison ////////////// + +#define OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, op, intrin, suffix, width) \ +inline _Tpvec operator op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vmerge_vxm_##suffix##m1(intrin(a, b), vzero_##suffix##m1(), 1)); \ +} + +#define OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, op, intrin, suffix, width) \ +inline _Tpvec operator op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vfmerge_vfm_##suffix##m1(intrin(a, b), vzero_##suffix##m1(), 1)); \ +} + +#define OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(_Tpvec, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ==, vmseq_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, !=, vmsne_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <, vmsltu_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >, vmsgtu_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <=, vmsleu_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >=, vmsgeu_vv_##suffix##m1_b##width, suffix, width) + +#define OPENCV_HAL_IMPL_RVV_SIGNED_CMP(_Tpvec, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ==, vmseq_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, !=, vmsne_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <, vmslt_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >, vmsgt_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <=, vmsle_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >=, vmsge_vv_##suffix##m1_b##width, suffix, width) + +#define OPENCV_HAL_IMPL_RVV_FLOAT_CMP(_Tpvec, suffix, width) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, ==, vmfeq_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, !=, vmfne_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, <, vmflt_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, >, vmfgt_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, <=, vmfle_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, >=, vmfge_vv_##suffix##m1_b##width, suffix, width) + + +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint8x16, u8, 8) +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint16x8, u16, 16) +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint32x4, u32, 32) +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint64x2, u64, 64) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int8x16, i8, 8) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int16x8, i16, 16) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int32x4, i32, 32) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int64x2, i64, 64) +OPENCV_HAL_IMPL_RVV_FLOAT_CMP(v_float32x4, f32, 32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_FLOAT_CMP(v_float64x2, f64, 64) +#endif + +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ return a == a; } + +#if CV_SIMD128_64F +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ return a == a; } +#endif + +////////////// Min/Max ////////////// + +#define OPENCV_HAL_IMPL_RVV_BIN_FUNC(_Tpvec, func, intrin, width) \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(intrin(a, b)); \ +} + +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_min, vminu_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_max, vmaxu_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_min, vmin_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_max, vmax_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_min, vminu_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_max, vmaxu_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_min, vmin_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_max, vmax_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint32x4, v_min, vminu_vv_u32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint32x4, v_max, vmaxu_vv_u32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int32x4, v_min, vmin_vv_i32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int32x4, v_max, vmax_vv_i32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float32x4, v_min, vfmin_vv_f32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float32x4, v_max, vfmax_vv_f32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint64x2, v_min, vminu_vv_u64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint64x2, v_max, vmaxu_vv_u64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int64x2, v_min, vmin_vv_i64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int64x2, v_max, vmax_vv_i64m1, 64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float64x2, v_min, vfmin_vv_f64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float64x2, v_max, vfmax_vv_f64m1, 64) +#endif + +////////////// Arithmetics wrap ////////////// + +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_add_wrap, vadd_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_add_wrap, vadd_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_add_wrap, vadd_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_add_wrap, vadd_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_sub_wrap, vsub_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_sub_wrap, vsub_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_sub_wrap, vsub_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_sub_wrap, vsub_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_mul_wrap, vmul_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_mul_wrap, vmul_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_mul_wrap, vmul_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_mul_wrap, vmul_vv_i16m1, 16) + +////////////// Reduce ////////////// + +#define OPENCV_HAL_IMPL_RVV_REDUCE_SUM(_Tpvec, _wTpvec, _nwTpvec, scalartype, suffix, wsuffix, wwidth, red) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + vsetvlmax_e##wwidth##m1(); \ + _nwTpvec zero = vzero_##wsuffix##m1(); \ + _nwTpvec res = vzero_##wsuffix##m1(); \ + res = v##red##_vs_##suffix##m1_##wsuffix##m1(res, a, zero); \ + return (scalartype)(_wTpvec(res).get0()); \ +} + +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint8x16, v_uint16x8, vuint16m1_t, unsigned, u8, u16, 16, wredsumu) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int8x16, v_int16x8, vint16m1_t, int, i8, i16, 16, wredsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint16x8, v_uint32x4, vuint32m1_t, unsigned, u16, u32, 32, wredsumu) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int16x8, v_int32x4, vint32m1_t, int, i16, i32, 32, wredsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint32x4, v_uint64x2, vuint64m1_t, unsigned, u32, u64, 64, wredsumu) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int32x4, v_int64x2, vint64m1_t, int, i32, i64, 64, wredsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_float32x4, v_float32x4, vfloat32m1_t, float, f32, f32, 32, fredsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint64x2, v_uint64x2, vuint64m1_t, uint64, u64, u64, 64, redsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int64x2, v_int64x2, vint64m1_t, int64, i64, i64, 64, redsum) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_float64x2, v_float64x2, vfloat64m1_t, double, f64, f64, 64, fredsum) +#endif + + +#define OPENCV_HAL_IMPL_RVV_REDUCE(_Tpvec, func, scalartype, suffix, width, red) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + _Tpvec res = _Tpvec(v##red##_vs_##suffix##m1_##suffix##m1(a, a, a)); \ + return scalartype(res.get0()); \ +} + +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint8x16, min, uchar, u8, 8, redminu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int8x16, min, schar, i8, 8, redmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint16x8, min, ushort, u16, 16, redminu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int16x8, min, short, i16, 16, redmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint32x4, min, unsigned, u32, 32, redminu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int32x4, min, int, i32, 32, redmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_float32x4, min, float, f32, 32, fredmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint8x16, max, uchar, u8, 8, redmaxu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int8x16, max, schar, i8, 8, redmax) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint16x8, max, ushort, u16, 16, redmaxu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int16x8, max, short, i16, 16, redmax) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint32x4, max, unsigned, u32, 32, redmaxu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int32x4, max, int, i32, 32, redmax) +OPENCV_HAL_IMPL_RVV_REDUCE(v_float32x4, max, float, f32, 32, fredmax) + + +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + v_reduce_sum(a), + v_reduce_sum(b), + v_reduce_sum(c), + v_reduce_sum(d) + }; + vsetvlmax_e32m1(); + return v_float32x4(vle32_v_f32m1(elems)); +} + +////////////// Square-Root ////////////// + +inline v_float32x4 v_sqrt(const v_float32x4& x) +{ + vsetvlmax_e32m1(); + return v_float32x4(vfsqrt_v_f32m1(x)); +} + +inline v_float32x4 v_invsqrt(const v_float32x4& x) +{ + v_float32x4 one = v_setall_f32(1.0f); + return one / v_sqrt(x); +} + +#if CV_SIMD128_64F +inline v_float64x2 v_sqrt(const v_float64x2& x) +{ + vsetvlmax_e64m1(); + return v_float64x2(vfsqrt_v_f64m1(x)); +} + +inline v_float64x2 v_invsqrt(const v_float64x2& x) +{ + v_float64x2 one = v_setall_f64(1.0f); + return one / v_sqrt(x); +} +#endif + +inline v_float32x4 v_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + vsetvlmax_e32m1(); + v_float32x4 x(vfmacc_vv_f32m1(vfmul_vv_f32m1(a, a), b, b)); + return v_sqrt(x); +} + +inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + vsetvlmax_e32m1(); + return v_float32x4(vfmacc_vv_f32m1(vfmul_vv_f32m1(a, a), b, b)); +} + +#if CV_SIMD128_64F +inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + vsetvlmax_e64m1(); + v_float64x2 x(vfmacc_vv_f64m1(vfmul_vv_f64m1(a, a), b, b)); + return v_sqrt(x); +} + +inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + vsetvlmax_e64m1(); + return v_float64x2(vfmacc_vv_f64m1(vfmul_vv_f64m1(a, a), b, b)); +} +#endif + +////////////// Multiply-Add ////////////// + +inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + vsetvlmax_e32m1(); + return v_float32x4(vfmacc_vv_f32m1(c, a, b)); +} +inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + vsetvlmax_e32m1(); + return v_int32x4(vmacc_vv_i32m1(c, a, b)); +} + +inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_fma(a, b, c); +} + +#if CV_SIMD128_64F +inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + vsetvlmax_e64m1(); + return v_float64x2(vfmacc_vv_f64m1(c, a, b)); +} + +inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return v_fma(a, b, c); +} +#endif + +////////////// Check all/any ////////////// + +#define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, suffix, shift, width) \ +inline bool v_check_all(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(vnot_v_##suffix##m1(a), shift)); \ + return (v.val[0] | v.val[1]) == 0; \ +} \ +inline bool v_check_any(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(a, shift)); \ + return (v.val[0] | v.val[1]) != 0; \ +} + +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint8x16, u8, 7, 8) +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint16x8, u16, 15, 16) +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint32x4, u32, 31, 32) +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint64x2, u64, 63, 64) + + +inline bool v_check_all(const v_int8x16& a) +{ return v_check_all(v_reinterpret_as_u8(a)); } +inline bool v_check_any(const v_int8x16& a) +{ return v_check_any(v_reinterpret_as_u8(a)); } + +inline bool v_check_all(const v_int16x8& a) +{ return v_check_all(v_reinterpret_as_u16(a)); } +inline bool v_check_any(const v_int16x8& a) +{ return v_check_any(v_reinterpret_as_u16(a)); } + +inline bool v_check_all(const v_int32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } +inline bool v_check_any(const v_int32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } + +inline bool v_check_all(const v_float32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } +inline bool v_check_any(const v_float32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } + +inline bool v_check_all(const v_int64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_int64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } + +#if CV_SIMD128_64F +inline bool v_check_all(const v_float64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_float64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } +#endif + +////////////// abs ////////////// + +#define OPENCV_HAL_IMPL_RVV_ABSDIFF(_Tpvec, abs) \ +inline _Tpvec v_##abs(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return v_max(a, b) - v_min(a, b); \ +} + +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint8x16, absdiff) +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint16x8, absdiff) +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint32x4, absdiff) +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_float32x4, absdiff) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_float64x2, absdiff) +#endif +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int8x16, absdiffs) +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int16x8, absdiffs) + +#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width) \ +inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _rTpvec(rshr((_nwTpvec)sub(v_max(a, b), v_min(a, b)), 0)); \ +} + +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8x16, v_uint8x16, vuint16m2_t, vwsub_vv_i16m2, vnclipu_wx_u8m1, 8) +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16x8, v_uint16x8, vuint32m2_t, vwsub_vv_i32m2, vnclipu_wx_u16m1, 16) +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32x4, v_uint32x4, vuint64m2_t, vwsub_vv_i64m2, vnclipu_wx_u32m1, 32) + +#define OPENCV_HAL_IMPL_RVV_ABS(_Tprvec, _Tpvec, suffix) \ +inline _Tprvec v_abs(const _Tpvec& a) \ +{ \ + return v_absdiff(a, v_setzero_##suffix()); \ +} + +OPENCV_HAL_IMPL_RVV_ABS(v_uint8x16, v_int8x16, s8) +OPENCV_HAL_IMPL_RVV_ABS(v_uint16x8, v_int16x8, s16) +OPENCV_HAL_IMPL_RVV_ABS(v_uint32x4, v_int32x4, s32) +OPENCV_HAL_IMPL_RVV_ABS(v_float32x4, v_float32x4, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_ABS(v_float64x2, v_float64x2, f64) +#endif + + +#define OPENCV_HAL_IMPL_RVV_REDUCE_SAD(_Tpvec, scalartype) \ +inline scalartype v_reduce_sad(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return v_reduce_sum(v_absdiff(a, b)); \ +} + +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint8x16, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int8x16, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint16x8, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int16x8, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int32x4, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_float32x4, float) + +////////////// Select ////////////// + +#define OPENCV_HAL_IMPL_RVV_SELECT(_Tpvec, merge, ne, width) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(merge(ne(mask, 0), b, a)); \ +} + +OPENCV_HAL_IMPL_RVV_SELECT(v_uint8x16, vmerge_vvm_u8m1, vmsne_vx_u8m1_b8, 8) +OPENCV_HAL_IMPL_RVV_SELECT(v_int8x16, vmerge_vvm_i8m1, vmsne_vx_i8m1_b8, 8) +OPENCV_HAL_IMPL_RVV_SELECT(v_uint16x8, vmerge_vvm_u16m1, vmsne_vx_u16m1_b16, 16) +OPENCV_HAL_IMPL_RVV_SELECT(v_int16x8, vmerge_vvm_i16m1, vmsne_vx_i16m1_b16, 16) +OPENCV_HAL_IMPL_RVV_SELECT(v_uint32x4, vmerge_vvm_u32m1, vmsne_vx_u32m1_b32, 32) +OPENCV_HAL_IMPL_RVV_SELECT(v_int32x4, vmerge_vvm_i32m1, vmsne_vx_i32m1_b32, 32) +OPENCV_HAL_IMPL_RVV_SELECT(v_float32x4, vmerge_vvm_f32m1, vmfne_vf_f32m1_b32, 32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_SELECT(v_float64x2, vmerge_vvm_f64m1, vmfne_vf_f64m1_b64, 64) +#endif + +////////////// Rotate shift ////////////// + +#define OPENCV_HAL_IMPL_RVV_ROTATE_OP(_Tpvec, suffix, width) \ +template inline _Tpvec v_rotate_right(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), a, n)); \ +} \ +template inline _Tpvec v_rotate_left(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vslideup_vx_##suffix##m1(vzero_##suffix##m1(), a, n)); \ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \ +{ return a; } \ +template inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), a, n), b, _Tpvec::nlanes - n)); \ +} \ +template inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), b, _Tpvec::nlanes - n), a, n)); \ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \ +{ CV_UNUSED(b); return a; } + + +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_uint8x16, u8, 8) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_int8x16, i8, 8) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_uint16x8, u16, 16) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_int16x8, i16, 16) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_uint32x4, u32, 32) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_int32x4, i32, 32) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_float32x4, f32, 32) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_uint64x2, u64, 64) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_int64x2, i64, 64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_float64x2, f64, 64) +#endif + +////////////// Convert to float ////////////// + +inline v_float32x4 v_cvt_f32(const v_int32x4& a) +{ + vsetvlmax_e32m1(); + return v_float32x4(vfcvt_f_x_v_f32m1(a)); +} + +#if CV_SIMD128_64F +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ + double arr[4] = {a.val[0], a.val[1], 0, 0}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + vsetvlmax_e32m1(); + return v_float32x4(vfncvt_f_f_w_f32m1(tmp)); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + double arr[4] = {a.val[0], a.val[1], b.val[0], b.val[1]}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + vsetvlmax_e32m1(); + return v_float32x4(vfncvt_f_f_w_f32m1(tmp)); +} + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ + double CV_DECL_ALIGNED(32) ptr[4] = {0}; + vsetvlmax_e64m2(); + vse64_v_f64m2(ptr, vfwcvt_f_x_v_f64m2(a)); + double CV_DECL_ALIGNED(32) elems[2] = + { + ptr[0], ptr[1] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ + double CV_DECL_ALIGNED(32) ptr[4] = {0}; + vsetvlmax_e64m2(); + vse64_v_f64m2(ptr, vfwcvt_f_x_v_f64m2(a)); + double CV_DECL_ALIGNED(32) elems[2] = + { + ptr[2], ptr[3] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ + double CV_DECL_ALIGNED(32) ptr[4] = {0}; + vsetvlmax_e64m2(); + vse64_v_f64m2(ptr, vfwcvt_f_f_v_f64m2(a)); + double CV_DECL_ALIGNED(32) elems[2] = + { + ptr[0], ptr[1] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ + double CV_DECL_ALIGNED(32) ptr[4] = {0}; + vsetvlmax_e64m2(); + vse64_v_f64m2(ptr, vfwcvt_f_f_v_f64m2(a)); + double CV_DECL_ALIGNED(32) elems[2] = + { + ptr[2], ptr[3] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} + +inline v_float64x2 v_cvt_f64(const v_int64x2& a) +{ + vsetvlmax_e64m1(); + return v_float64x2(vfcvt_f_x_v_f64m1(a)); +} +#endif + +////////////// Broadcast ////////////// + +#define OPENCV_HAL_IMPL_RVV_BROADCAST(_Tpvec, suffix) \ +template inline _Tpvec v_broadcast_element(_Tpvec v) \ +{ \ + return v_setall_##suffix(v_extract_n(v)); \ +} + +OPENCV_HAL_IMPL_RVV_BROADCAST(v_uint8x16, u8) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_int8x16, s8) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_uint16x8, u16) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_int16x8, s16) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_uint32x4, u32) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_int32x4, s32) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_uint64x2, u64) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_int64x2, s64) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_float32x4, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_BROADCAST(v_float64x2, f64) +#endif + +////////////// Transpose4x4 ////////////// + +#define OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(_Tpvec, _Tp, suffix) \ +inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \ + const v_##_Tpvec& a2, const v_##_Tpvec& a3, \ + v_##_Tpvec& b0, v_##_Tpvec& b1, \ + v_##_Tpvec& b2, v_##_Tpvec& b3) \ +{ \ + _Tp CV_DECL_ALIGNED(32) elems0[4] = \ + { \ + v_extract_n<0>(a0), \ + v_extract_n<0>(a1), \ + v_extract_n<0>(a2), \ + v_extract_n<0>(a3) \ + }; \ + b0 = v_load(elems0); \ + _Tp CV_DECL_ALIGNED(32) elems1[4] = \ + { \ + v_extract_n<1>(a0), \ + v_extract_n<1>(a1), \ + v_extract_n<1>(a2), \ + v_extract_n<1>(a3) \ + }; \ + b1 = v_load(elems1); \ + _Tp CV_DECL_ALIGNED(32) elems2[4] = \ + { \ + v_extract_n<2>(a0), \ + v_extract_n<2>(a1), \ + v_extract_n<2>(a2), \ + v_extract_n<2>(a3) \ + }; \ + b2 = v_load(elems2); \ + _Tp CV_DECL_ALIGNED(32) elems3[4] = \ + { \ + v_extract_n<3>(a0), \ + v_extract_n<3>(a1), \ + v_extract_n<3>(a2), \ + v_extract_n<3>(a3) \ + }; \ + b3 = v_load(elems3); \ +} + +OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(int32x4, int, i32) +OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(float32x4, float, f32) + +////////////// Reverse ////////////// + +#define OPENCV_HAL_IMPL_RVV_REVERSE(_Tpvec, _Tp, width, suffix) \ +inline _Tpvec v_reverse(const _Tpvec& a) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptra[_Tpvec::nlanes] = {0}; \ + v_store(ptra, a); \ + for (int i = 0; i < _Tpvec::nlanes; i++) \ + { \ + ptr[i] = ptra[_Tpvec::nlanes-i-1]; \ + } \ + return v_load(ptr); \ +} + +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint8x16, uchar, 8, u8) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int8x16, schar, 8, i8) +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint16x8, ushort, 16, u16) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int16x8, short, 16, i16) +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint32x4, unsigned, 32, u32) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int32x4, int, 32, i32) +OPENCV_HAL_IMPL_RVV_REVERSE(v_float32x4, float, 32, f32) +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint64x2, uint64, 64, u64) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int64x2, int64, 64, i64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_REVERSE(v_float64x2, double, 64, f64) +#endif + +//////////// Value reordering //////////// + +#define OPENCV_HAL_IMPL_RVV_EXPAND(_Tpwvec, _Tp, _Tpvec, width, suffix, wcvt) \ +inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ +{ \ + _Tp CV_DECL_ALIGNED(32) lptr[_Tpvec::nlanes/2] = {0}; \ + _Tp CV_DECL_ALIGNED(32) hptr[_Tpvec::nlanes/2] = {0}; \ + v_store_low(lptr, a); \ + v_store_high(hptr, a); \ + b0 = _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(lptr))); \ + b1 = _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(hptr))); \ +} \ +inline _Tpwvec v_expand_low(const _Tpvec& a) \ +{ \ + _Tp CV_DECL_ALIGNED(32) lptr[_Tpvec::nlanes/2] = {0}; \ + v_store_low(lptr, a); \ + return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(lptr))); \ +} \ +inline _Tpwvec v_expand_high(const _Tpvec& a) \ +{ \ + _Tp CV_DECL_ALIGNED(32) hptr[_Tpvec::nlanes/2] = {0}; \ + v_store_high(hptr, a); \ + return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(hptr))); \ +} \ +inline _Tpwvec v_load_expand(const _Tp* ptr) \ +{ \ + return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(ptr))); \ +} + +OPENCV_HAL_IMPL_RVV_EXPAND(v_uint16x8, uchar, v_uint8x16, 8, u8, vwcvtu_x_x_v_u16m1) +OPENCV_HAL_IMPL_RVV_EXPAND(v_int16x8, schar, v_int8x16, 8, i8, vwcvt_x_x_v_i16m1) +OPENCV_HAL_IMPL_RVV_EXPAND(v_uint32x4, ushort, v_uint16x8, 16, u16, vwcvtu_x_x_v_u32m1) +OPENCV_HAL_IMPL_RVV_EXPAND(v_int32x4, short, v_int16x8, 16, i16, vwcvt_x_x_v_i32m1) +OPENCV_HAL_IMPL_RVV_EXPAND(v_uint64x2, uint, v_uint32x4, 32, u32, vwcvtu_x_x_v_u64m1) +OPENCV_HAL_IMPL_RVV_EXPAND(v_int64x2, int, v_int32x4, 32, i32, vwcvt_x_x_v_i64m1) + +inline v_uint32x4 v_load_expand_q(const uchar* ptr) +{ + vsetvlmax_e32m1(); + return v_uint32x4(vwcvtu_x_x_v_u32m1(vwcvtu_x_x_v_u16mf2(vle8_v_u8mf4(ptr)))); +} + +inline v_int32x4 v_load_expand_q(const schar* ptr) +{ + vsetvlmax_e32m1(); + return v_int32x4(vwcvt_x_x_v_i32m1(vwcvt_x_x_v_i16mf2(vle8_v_i8mf4(ptr)))); +} + + +#define OPENCV_HAL_IMPL_RVV_PACK(_Tpvec, _Tp, _wTpvec, _wTp, width, suffix, rshr, shr) \ +inline _Tpvec v_pack(const _wTpvec& a, const _wTpvec& b) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, b); \ + vsetvlmax_e##width##m2(); \ + return _Tpvec(shr(vle##width##_v_##suffix##m2(arr), 0)); \ +} \ +inline void v_pack_store(_Tp* ptr, const _wTpvec& a) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \ + vsetvlmax_e##width##m2(); \ + v_store(ptr, _Tpvec(shr(vle##width##_v_##suffix##m2(arr), 0))); \ +} \ +template inline \ +_Tpvec v_rshr_pack(const _wTpvec& a, const _wTpvec& b) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, b); \ + vsetvlmax_e##width##m2(); \ + return _Tpvec(rshr(vle##width##_v_##suffix##m2(arr), n)); \ +} \ +template inline \ +void v_rshr_pack_store(_Tp* ptr, const _wTpvec& a) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \ + vsetvlmax_e##width##m2(); \ + v_store(ptr, _Tpvec(rshr(vle##width##_v_##suffix##m2(arr), n))); \ +} + +OPENCV_HAL_IMPL_RVV_PACK(v_uint8x16, uchar, v_uint16x8, ushort, 16, u16, vnclipu_wx_u8m1, vnclipu_wx_u8m1) +OPENCV_HAL_IMPL_RVV_PACK(v_int8x16, schar, v_int16x8, short, 16, i16, vnclip_wx_i8m1, vnclip_wx_i8m1) +OPENCV_HAL_IMPL_RVV_PACK(v_uint16x8, ushort, v_uint32x4, unsigned, 32, u32, vnclipu_wx_u16m1, vnclipu_wx_u16m1) +OPENCV_HAL_IMPL_RVV_PACK(v_int16x8, short, v_int32x4, int, 32, i32, vnclip_wx_i16m1, vnclip_wx_i16m1) +OPENCV_HAL_IMPL_RVV_PACK(v_uint32x4, unsigned, v_uint64x2, uint64, 64, u64, vnclipu_wx_u32m1, vnsrl_wx_u32m1) +OPENCV_HAL_IMPL_RVV_PACK(v_int32x4, int, v_int64x2, int64, 64, i64, vnclip_wx_i32m1, vnsra_wx_i32m1) + + +#define OPENCV_HAL_IMPL_RVV_PACK_U(_Tpvec, _Tp, _wTpvec, _wTp, width, suffix, rshr, cast) \ +inline _Tpvec v_pack_u(const _wTpvec& a, const _wTpvec& b) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, b); \ + vsetvlmax_e##width##m2(); \ + return _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), 0)); \ +} \ +inline void v_pack_u_store(_Tp* ptr, const _wTpvec& a) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \ + vsetvlmax_e##width##m2(); \ + v_store(ptr, _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), 0))); \ +} \ +template inline \ +_Tpvec v_rshr_pack_u(const _wTpvec& a, const _wTpvec& b) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, b); \ + vsetvlmax_e##width##m2(); \ + return _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), n)); \ +} \ +template inline \ +void v_rshr_pack_u_store(_Tp* ptr, const _wTpvec& a) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \ + vsetvlmax_e##width##m2(); \ + v_store(ptr, _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), n))); \ +} + +OPENCV_HAL_IMPL_RVV_PACK_U(v_uint8x16, uchar, v_int16x8, short, 16, i16, vnclipu_wx_u8m1, vreinterpret_v_i16m2_u16m2) +OPENCV_HAL_IMPL_RVV_PACK_U(v_uint16x8, ushort, v_int32x4, int, 32, i32, vnclipu_wx_u16m1, vreinterpret_v_i32m2_u32m2) + + +#define OPENCV_HAL_IMPL_RVV_UNPACKS(_Tpvec, _Tp, width, suffix) \ +inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptra0[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptra1[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb0[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb1[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptra0, a0); \ + v_store(ptra1, a1); \ + int i; \ + for( i = 0; i < v_##_Tpvec::nlanes/2; i++ ) \ + { \ + ptrb0[i*2] = ptra0[i]; \ + ptrb0[i*2+1] = ptra1[i]; \ + } \ + for( ; i < v_##_Tpvec::nlanes; i++ ) \ + { \ + ptrb1[i*2-v_##_Tpvec::nlanes] = ptra0[i]; \ + ptrb1[i*2-v_##_Tpvec::nlanes+1] = ptra1[i]; \ + } \ + b0 = v_load(ptrb0); \ + b1 = v_load(ptrb1); \ +} \ +inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes/2] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes/2] = {0}; \ + v_store_low(ptra, a); \ + v_store_low(ptrb, b); \ + return v_load_halves(ptra, ptrb); \ +} \ +inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes/2] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes/2] = {0}; \ + v_store_high(ptra, a); \ + v_store_high(ptrb, b); \ + return v_load_halves(ptra, ptrb); \ +} \ +inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \ +{ \ + c = v_combine_low(a, b); \ + d = v_combine_high(a, b); \ +} + +OPENCV_HAL_IMPL_RVV_UNPACKS(uint8x16, uchar, 8, u8) +OPENCV_HAL_IMPL_RVV_UNPACKS(int8x16, schar, 8, i8) +OPENCV_HAL_IMPL_RVV_UNPACKS(uint16x8, ushort, 16, u16) +OPENCV_HAL_IMPL_RVV_UNPACKS(int16x8, short, 16, i16) +OPENCV_HAL_IMPL_RVV_UNPACKS(uint32x4, unsigned, 32, u32) +OPENCV_HAL_IMPL_RVV_UNPACKS(int32x4, int, 32, i32) +OPENCV_HAL_IMPL_RVV_UNPACKS(float32x4, float, 32, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_UNPACKS(float64x2, double, 64, f64) +#endif + + +#define OPENCV_HAL_IMPL_RVV_INTERLEAVED(_Tpvec, _Tp, suffix, width) \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + int i, i2; \ + for( i = i2 = 0; i < v_##_Tpvec::nlanes; i++, i2 += 2 ) \ + { \ + ptra[i] = ptr[i2]; \ + ptrb[i] = ptr[i2+1]; \ + } \ + a = v_load(ptra); \ + b = v_load(ptrb); \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \ + int i, i3; \ + for( i = i3 = 0; i < v_##_Tpvec::nlanes; i++, i3 += 3 ) \ + { \ + ptra[i] = ptr[i3]; \ + ptrb[i] = ptr[i3+1]; \ + ptrc[i] = ptr[i3+2]; \ + } \ + a = v_load(ptra); \ + b = v_load(ptrb); \ + c = v_load(ptrc); \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \ + v_##_Tpvec& c, v_##_Tpvec& d) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrd[v_##_Tpvec::nlanes] = {0}; \ + int i, i4; \ + for( i = i4 = 0; i < v_##_Tpvec::nlanes; i++, i4 += 4 ) \ + { \ + ptra[i] = ptr[i4]; \ + ptrb[i] = ptr[i4+1]; \ + ptrc[i] = ptr[i4+2]; \ + ptrd[i] = ptr[i4+3]; \ + } \ + a = v_load(ptra); \ + b = v_load(ptrb); \ + c = v_load(ptrc); \ + d = v_load(ptrd); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + int i, i2; \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptra, a); \ + v_store(ptrb, b); \ + for( i = i2 = 0; i < v_##_Tpvec::nlanes; i++, i2 += 2 ) \ + { \ + ptr[i2] = ptra[i]; \ + ptr[i2+1] = ptrb[i]; \ + } \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + int i, i3; \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptra, a); \ + v_store(ptrb, b); \ + v_store(ptrc, c); \ + for( i = i3 = 0; i < v_##_Tpvec::nlanes; i++, i3 += 3 ) \ + { \ + ptr[i3] = ptra[i]; \ + ptr[i3+1] = ptrb[i]; \ + ptr[i3+2] = ptrc[i]; \ + } \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, const v_##_Tpvec& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \ +{ \ + int i, i4; \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrd[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptra, a); \ + v_store(ptrb, b); \ + v_store(ptrc, c); \ + v_store(ptrd, d); \ + for( i = i4 = 0; i < v_##_Tpvec::nlanes; i++, i4 += 4 ) \ + { \ + ptr[i4] = ptra[i]; \ + ptr[i4+1] = ptrb[i]; \ + ptr[i4+2] = ptrc[i]; \ + ptr[i4+3] = ptrd[i]; \ + } \ +} \ +inline v_##_Tpvec v_interleave_pairs(const v_##_Tpvec& vec) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptr[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrvec[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptrvec, vec); \ + for (int i = 0; i < v_##_Tpvec::nlanes/4; i++) \ + { \ + ptr[4*i ] = ptrvec[4*i ]; \ + ptr[4*i+1] = ptrvec[4*i+2]; \ + ptr[4*i+2] = ptrvec[4*i+1]; \ + ptr[4*i+3] = ptrvec[4*i+3]; \ + } \ + return v_load(ptr); \ +} \ +inline v_##_Tpvec v_interleave_quads(const v_##_Tpvec& vec) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptr[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrvec[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptrvec, vec); \ + for (int i = 0; i < v_##_Tpvec::nlanes/8; i++) \ + { \ + ptr[8*i ] = ptrvec[4*i ]; \ + ptr[8*i+1] = ptrvec[4*i+4]; \ + ptr[8*i+2] = ptrvec[4*i+1]; \ + ptr[8*i+3] = ptrvec[4*i+5]; \ + ptr[8*i+4] = ptrvec[4*i+2]; \ + ptr[8*i+5] = ptrvec[4*i+6]; \ + ptr[8*i+6] = ptrvec[4*i+3]; \ + ptr[8*i+7] = ptrvec[4*i+7]; \ + } \ + return v_load(ptr); \ +} + +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint8x16, uchar, u8, 8) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int8x16, schar, i8, 8) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint16x8, ushort, u16, 16) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int16x8, short, i16, 16) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint32x4, unsigned, u32, 32) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int32x4, int, i32, 32) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(float32x4, float, f32, 32) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint64x2, uint64, u64, 64) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int64x2, int64, i64, 64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_INTERLEAVED(float64x2, double, f64, 64) +#endif + +//////////// PopCount //////////// static const unsigned char popCountTable[] = { @@ -325,1354 +2329,571 @@ static const unsigned char popCountTable[] = 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, }; -template -inline v_reg::abs_type, n> v_popcount(const v_reg<_Tp, n>& a) -{ - v_reg::abs_type, n> b = v_reg::abs_type, n>::zero(); - for (int i = 0; i < n*(int)sizeof(_Tp); i++) - b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]]; - return b; -} - -template -inline void v_minmax( const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval ) -{ - for( int i = 0; i < n; i++ ) - { - minval.s[i] = std::min(a.s[i], b.s[i]); - maxval.s[i] = std::max(a.s[i], b.s[i]); - } -} - -#define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \ -template \ -inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +#define OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(_rTpvec, _Tpvec, _rTp, _Tp, suffix) \ +inline _rTpvec v_popcount(const _Tpvec& a) \ { \ - typedef typename V_TypeTraits<_Tp>::int_type itype; \ - v_reg<_Tp, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \ - return c; \ + uchar CV_DECL_ALIGNED(32) ptra[16] = {0}; \ + v_store(ptra, v_reinterpret_as_u8(a)); \ + _rTp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \ + v_store(ptr, v_setzero_##suffix()); \ + for (int i = 0; i < _Tpvec::nlanes*(int)sizeof(_Tp); i++) \ + ptr[i/sizeof(_Tp)] += popCountTable[ptra[i]]; \ + return v_load(ptr); \ } -OPENCV_HAL_IMPL_CMP_OP(<) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint8x16, v_uint8x16, uchar, uchar, u8) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint8x16, v_int8x16, uchar, schar, u8) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint16x8, v_uint16x8, ushort, ushort, u16) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint16x8, v_int16x8, ushort, short, u16) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint32x4, v_uint32x4, unsigned, unsigned, u32) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint32x4, v_int32x4, unsigned, int, u32) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint64x2, v_uint64x2, uint64, uint64, u64) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint64x2, v_int64x2, uint64, int64, u64) -OPENCV_HAL_IMPL_CMP_OP(>) +//////////// SignMask //////////// -OPENCV_HAL_IMPL_CMP_OP(<=) - -OPENCV_HAL_IMPL_CMP_OP(>=) - -OPENCV_HAL_IMPL_CMP_OP(==) - -OPENCV_HAL_IMPL_CMP_OP(!=) - -template -inline v_reg v_not_nan(const v_reg& a) -{ - typedef typename V_TypeTraits::int_type itype; - v_reg c; - for (int i = 0; i < n; i++) - c.s[i] = V_TypeTraits::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i])); - return c; -} -template -inline v_reg v_not_nan(const v_reg& a) -{ - typedef typename V_TypeTraits::int_type itype; - v_reg c; - for (int i = 0; i < n; i++) - c.s[i] = V_TypeTraits::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i])); - return c; -} - -#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \ -template \ -inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +#define OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(_Tpvec, _Tp, suffix, width, shift) \ +inline int v_signmask(const _Tpvec& a) \ { \ - typedef _Tp2 rtype; \ - v_reg c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \ - return c; \ + int mask = 0; \ + vsetvlmax_e##width##m1(); \ + _Tpvec tmp = _Tpvec(vsrl_vx_##suffix##m1(a, shift)); \ + for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + mask |= (int)(tmp.val[i]) << i; \ + return mask; \ } -OPENCV_HAL_IMPL_ARITHM_OP(v_add_wrap, +, (_Tp), _Tp) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint8x16, uchar, u8, 8, 7) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint16x8, ushort, u16, 16, 15) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint32x4, unsigned, u32, 32, 31) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint64x2, uint64, u64, 64, 63) -OPENCV_HAL_IMPL_ARITHM_OP(v_sub_wrap, -, (_Tp), _Tp) +inline int v_signmask(const v_int8x16& a) +{ return v_signmask(v_reinterpret_as_u8(a)); } +inline int v_signmask(const v_int16x8& a) +{ return v_signmask(v_reinterpret_as_u16(a)); } +inline int v_signmask(const v_int32x4& a) +{ return v_signmask(v_reinterpret_as_u32(a)); } +inline int v_signmask(const v_float32x4& a) +{ return v_signmask(v_reinterpret_as_u32(a)); } +inline int v_signmask(const v_int64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } +#if CV_SIMD128_64F +inline int v_signmask(const v_float64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } +#endif -OPENCV_HAL_IMPL_ARITHM_OP(v_mul_wrap, *, (_Tp), _Tp) -template inline T _absdiff(T a, T b) -{ - return a > b ? a - b : b - a; -} +//////////// Scan forward //////////// -template -inline v_reg::abs_type, n> v_absdiff(const v_reg<_Tp, n>& a, const v_reg<_Tp, n> & b) -{ - typedef typename V_TypeTraits<_Tp>::abs_type rtype; - v_reg c; - const rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0); - for( int i = 0; i < n; i++ ) - { - rtype ua = a.s[i] ^ mask; - rtype ub = b.s[i] ^ mask; - c.s[i] = _absdiff(ua, ub); - } - return c; -} - -inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b) -{ - v_float32x4 c; - for( int i = 0; i < c.nlanes; i++ ) - c.s[i] = _absdiff(a.s[i], b.s[i]); - return c; -} - -inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b) -{ - v_float64x2 c; - for( int i = 0; i < c.nlanes; i++ ) - c.s[i] = _absdiff(a.s[i], b.s[i]); - return c; -} - -template -inline v_reg<_Tp, n> v_absdiffs(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++) - c.s[i] = saturate_cast<_Tp>(std::abs(a.s[i] - b.s[i])); - return c; -} - -template -inline v_reg<_Tp, n> v_invsqrt(const v_reg<_Tp, n>& a) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = 1.f/std::sqrt(a.s[i]); - return c; -} - -template -inline v_reg<_Tp, n> v_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]); - return c; -} - -template -inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i]; - return c; -} - -template -inline v_reg<_Tp, n> v_fma(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - const v_reg<_Tp, n>& c) -{ - v_reg<_Tp, n> d; - for( int i = 0; i < n; i++ ) - d.s[i] = a.s[i]*b.s[i] + c.s[i]; - return d; -} - -template -inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - const v_reg<_Tp, n>& c) -{ - return v_fma(a, b, c); -} - -template inline v_reg::w_type, n/2> -v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - v_reg c; - for( int i = 0; i < (n/2); i++ ) - c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1]; - return c; -} - -template inline v_reg::w_type, n/2> -v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - const v_reg::w_type, n / 2>& c) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - v_reg s; - for( int i = 0; i < (n/2); i++ ) - s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i]; - return s; -} - -template inline v_reg::w_type, n/2> -v_dotprod_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ return v_dotprod(a, b); } - -template inline v_reg::w_type, n/2> -v_dotprod_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - const v_reg::w_type, n / 2>& c) -{ return v_dotprod(a, b, c); } - -template inline v_reg::q_type, n/4> -v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - typedef typename V_TypeTraits<_Tp>::q_type q_type; - v_reg s; - for( int i = 0; i < (n/4); i++ ) - s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] + - (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3]; - return s; -} - -template inline v_reg::q_type, n/4> -v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - const v_reg::q_type, n / 4>& c) -{ - typedef typename V_TypeTraits<_Tp>::q_type q_type; - v_reg s; - for( int i = 0; i < (n/4); i++ ) - s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] + - (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3] + c.s[i]; - return s; -} - -template inline v_reg::q_type, n/4> -v_dotprod_expand_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ return v_dotprod_expand(a, b); } - -template inline v_reg::q_type, n/4> -v_dotprod_expand_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - const v_reg::q_type, n / 4>& c) -{ return v_dotprod_expand(a, b, c); } - -template inline void v_mul_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - v_reg::w_type, n/2>& c, - v_reg::w_type, n/2>& d) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - for( int i = 0; i < (n/2); i++ ) - { - c.s[i] = (w_type)a.s[i]*b.s[i]; - d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)]; - } -} - -template inline v_reg<_Tp, n> v_mul_hi(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - v_reg<_Tp, n> c; - for (int i = 0; i < n; i++) - c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >> sizeof(_Tp)*8); - return c; -} - -template inline void v_hsum(const v_reg<_Tp, n>& a, - v_reg::w_type, n/2>& c) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - for( int i = 0; i < (n/2); i++ ) - { - c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1]; - } -} - -#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \ -template inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \ +#define OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(_Tpvec, _Tp, suffix) \ +inline int v_scan_forward(const _Tpvec& a) \ { \ - v_reg<_Tp, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = (_Tp)(a.s[i] shift_op imm); \ - return c; \ + _Tp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \ + v_store(ptr, v_reinterpret_as_##suffix(a)); \ + for (int i = 0; i < _Tpvec::nlanes; i++) \ + if(int(ptr[i]) < 0) \ + return i; \ + return 0; \ } -OPENCV_HAL_IMPL_SHIFT_OP(<< ) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_int16x8, short, s16) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_int32x4, int, s32) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_float32x4, float, f32) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_int64x2, int64, s64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_float64x2, double, f64) +#endif -OPENCV_HAL_IMPL_SHIFT_OP(>> ) +//////////// Pack triplets //////////// -#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix,opA,opB) \ -template inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a) \ +#define OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(_Tpvec, _Tp) \ +inline _Tpvec v_pack_triplets(const _Tpvec& vec) \ { \ - v_reg<_Tp, n> b; \ - for (int i = 0; i < n; i++) \ + _Tp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrvec[_Tpvec::nlanes] = {0}; \ + v_store(ptrvec, vec); \ + for (int i = 0; i < _Tpvec::nlanes/4; i++) \ { \ - int sIndex = i opA imm; \ - if (0 <= sIndex && sIndex < n) \ - { \ - b.s[i] = a.s[sIndex]; \ - } \ - else \ - { \ - b.s[i] = 0; \ - } \ + ptr[3*i ] = ptrvec[4*i ]; \ + ptr[3*i+1] = ptrvec[4*i+2]; \ + ptr[3*i+2] = ptrvec[4*i+2]; \ } \ - return b; \ -} \ -template inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - v_reg<_Tp, n> c; \ - for (int i = 0; i < n; i++) \ - { \ - int aIndex = i opA imm; \ - int bIndex = i opA imm opB n; \ - if (0 <= bIndex && bIndex < n) \ - { \ - c.s[i] = b.s[bIndex]; \ - } \ - else if (0 <= aIndex && aIndex < n) \ - { \ - c.s[i] = a.s[aIndex]; \ - } \ - else \ - { \ - c.s[i] = 0; \ - } \ - } \ - return c; \ + return v_load(ptr); \ } -OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(left, -, +) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint8x16, uchar) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int8x16, schar) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint16x8, ushort) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int16x8, short) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int32x4, int) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_float32x4, float) -OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(right, +, -) -template inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a) +////// FP16 support /////// + +#if CV_FP16 +inline v_float32x4 v_load_expand(const float16_t* ptr) { - typename V_TypeTraits<_Tp>::sum_type c = a.s[0]; - for( int i = 1; i < n; i++ ) - c += a.s[i]; - return c; + return v_float32x4(vfwcvt_f_f_v_f32m1(vle16_v_f16mf2(ptr))); } -inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, - const v_float32x4& c, const v_float32x4& d) +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) { - v_float32x4 r; - r.s[0] = a.s[0] + a.s[1] + a.s[2] + a.s[3]; - r.s[1] = b.s[0] + b.s[1] + b.s[2] + b.s[3]; - r.s[2] = c.s[0] + c.s[1] + c.s[2] + c.s[3]; - r.s[3] = d.s[0] + d.s[1] + d.s[2] + d.s[3]; - return r; + vse16_v_f16mf2(ptr, vfncvt_f_f_w_f16mf2(v)); +} +#else +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + const int N = 4; + float buf[N]; + for( int i = 0; i < N; i++ ) buf[i] = (float)ptr[i]; + return v_load(buf); } -template inline typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type v_reduce_sad(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) { - typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type c = _absdiff(a.s[0], b.s[0]); - for (int i = 1; i < n; i++) - c += _absdiff(a.s[i], b.s[i]); - return c; + const int N = 4; + float buf[N]; + v_store(buf, v); + for( int i = 0; i < N; i++ ) ptr[i] = float16_t(buf[i]); } - -template inline int v_signmask(const v_reg<_Tp, n>& a) -{ - int mask = 0; - for( int i = 0; i < n; i++ ) - mask |= (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) << i; - return mask; -} - -template inline int v_scan_forward(const v_reg<_Tp, n>& a) -{ - for (int i = 0; i < n; i++) - if(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) - return i; - return 0; -} - -template inline bool v_check_all(const v_reg<_Tp, n>& a) -{ - for( int i = 0; i < n; i++ ) - if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) >= 0 ) - return false; - return true; -} - -template inline bool v_check_any(const v_reg<_Tp, n>& a) -{ - for( int i = 0; i < n; i++ ) - if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0 ) - return true; - return false; -} - -template inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask, - const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - typedef V_TypeTraits<_Tp> Traits; - typedef typename Traits::int_type int_type; - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - { - int_type m = Traits::reinterpret_int(mask.s[i]); - CV_DbgAssert(m == 0 || m == (~(int_type)0)); // restrict mask values: 0 or 0xff/0xffff/etc - c.s[i] = m ? a.s[i] : b.s[i]; - } - return c; -} - -template inline void v_expand(const v_reg<_Tp, n>& a, - v_reg::w_type, n/2>& b0, - v_reg::w_type, n/2>& b1) -{ - for( int i = 0; i < (n/2); i++ ) - { - b0.s[i] = a.s[i]; - b1.s[i] = a.s[i+(n/2)]; - } -} - -template -inline v_reg::w_type, n/2> -v_expand_low(const v_reg<_Tp, n>& a) -{ - v_reg::w_type, n/2> b; - for( int i = 0; i < (n/2); i++ ) - b.s[i] = a.s[i]; - return b; -} - -template -inline v_reg::w_type, n/2> -v_expand_high(const v_reg<_Tp, n>& a) -{ - v_reg::w_type, n/2> b; - for( int i = 0; i < (n/2); i++ ) - b.s[i] = a.s[i+(n/2)]; - return b; -} - -template inline v_reg::int_type, n> - v_reinterpret_as_int(const v_reg<_Tp, n>& a) -{ - v_reg::int_type, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]); - return c; -} - -template inline v_reg::uint_type, n> - v_reinterpret_as_uint(const v_reg<_Tp, n>& a) -{ - v_reg::uint_type, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]); - return c; -} - -template inline void v_zip( const v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1, - v_reg<_Tp, n>& b0, v_reg<_Tp, n>& b1 ) -{ - int i; - for( i = 0; i < n/2; i++ ) - { - b0.s[i*2] = a0.s[i]; - b0.s[i*2+1] = a1.s[i]; - } - for( ; i < n; i++ ) - { - b1.s[i*2-n] = a0.s[i]; - b1.s[i*2-n+1] = a1.s[i]; - } -} - -template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load(const _Tp* ptr) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); #endif - return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr); + +////////////// Rounding ////////////// + +inline v_int32x4 v_round(const v_float32x4& a) +{ + vsetvlmax_e32m1(); + return v_int32x4(vfcvt_x_f_v_i32m1(a)); } -template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_aligned(const _Tp* ptr) +inline v_int32x4 v_floor(const v_float32x4& a) { - CV_Assert(isAligned::nlanes128>)>(ptr)); - return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr); + v_float32x4 ZP5 = v_setall_f32(0.5f); + v_float32x4 t = a - ZP5; + vsetvlmax_e32m1(); + return v_int32x4(vfcvt_x_f_v_i32m1(t)); } -template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_low(const _Tp* ptr) +inline v_int32x4 v_ceil(const v_float32x4& a) { -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); + v_float32x4 ZP5 = v_setall_f32(0.5f); + v_float32x4 t = a + ZP5; + vsetvlmax_e32m1(); + return v_int32x4(vfcvt_x_f_v_i32m1(t)); +} + +inline v_int32x4 v_trunc(const v_float32x4& a) +{ + vsetvlmax_e32m1(); + return v_int32x4(vfcvt_rtz_x_f_v_i32m1(a)); +} +#if CV_SIMD128_64F +inline v_int32x4 v_round(const v_float64x2& a) +{ + double arr[4] = {a.val[0], a.val[1], 0, 0}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp)); +} + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + double arr[4] = {a.val[0], a.val[1], b.val[0], b.val[1]}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp)); +} + +inline v_int32x4 v_floor(const v_float64x2& a) +{ + double arr[4] = {a.val[0]-0.5f, a.val[1]-0.5f, 0, 0}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp)); +} + +inline v_int32x4 v_ceil(const v_float64x2& a) +{ + double arr[4] = {a.val[0]+0.5f, a.val[1]+0.5f, 0, 0}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp)); +} + +inline v_int32x4 v_trunc(const v_float64x2& a) +{ + double arr[4] = {a.val[0], a.val[1], 0, 0}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + return v_int32x4(vfncvt_rtz_x_f_w_i32m1(tmp)); +} #endif - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for( int i = 0; i < c.nlanes/2; i++ ) - { - c.s[i] = ptr[i]; - } - return c; + + +//////// Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) +{ + int CV_DECL_ALIGNED(32) ptr[8] = {0}; + v_int32x4 t1, t2; + vsetvlmax_e32m2(); + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b)); + v_load_deinterleave(ptr, t1, t2); + return t1 + t2; +} +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ + int CV_DECL_ALIGNED(32) ptr[8] = {0}; + v_int32x4 t1, t2; + vsetvlmax_e32m2(); + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b)); + v_load_deinterleave(ptr, t1, t2); + return t1 + t2 + c; } -template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_halves(const _Tp* loptr, const _Tp* hiptr) +// 32 >> 64 +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) { -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(loptr)); - CV_Assert(isAligned(hiptr)); + int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; + v_int64x2 t1, t2; + vsetvlmax_e64m2(); + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b)); + v_load_deinterleave(ptr, t1, t2); + return t1 + t2; +} +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ + int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; + v_int64x2 t1, t2; + vsetvlmax_e64m2(); + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b)); + v_load_deinterleave(ptr, t1, t2); + return t1 + t2 + c; +} + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ + unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_uint32x4 t1, t2, t3, t4; + vsetvlmax_e32m4(); + vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4; +} +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, + const v_uint32x4& c) +{ + unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_uint32x4 t1, t2, t3, t4; + vsetvlmax_e32m4(); + vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4 + c; +} + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ + int CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_int32x4 t1, t2, t3, t4; + vsetvlmax_e32m4(); + vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4; +} +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, + const v_int32x4& c) +{ + int CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_int32x4 t1, t2, t3, t4; + vsetvlmax_e32m4(); + vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4 + c; +} + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + v_uint64x2 t1, t2, t3, t4; + vsetvlmax_e64m4(); + vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4; +} +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ + uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + v_uint64x2 t1, t2, t3, t4; + vsetvlmax_e64m4(); + vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4 + c; +} + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + v_int64x2 t1, t2, t3, t4; + vsetvlmax_e64m4(); + vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4; +} +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, + const v_int64x2& c) +{ + int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + v_int64x2 t1, t2, t3, t4; + vsetvlmax_e64m4(); + vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4 + c; +} + +// 32 >> 64f +#if CV_SIMD128_64F +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, + const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } #endif - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for( int i = 0; i < c.nlanes/2; i++ ) - { - c.s[i] = loptr[i]; - c.s[i+c.nlanes/2] = hiptr[i]; - } - return c; + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ + int CV_DECL_ALIGNED(32) ptr[8] = {0}; + vsetvlmax_e32m2(); + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b)); + v_int32x4 t1 = v_load(ptr); + v_int32x4 t2 = v_load(ptr+4); + return t1 + t2; +} +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ + int CV_DECL_ALIGNED(32) ptr[8] = {0}; + vsetvlmax_e32m2(); + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b)); + v_int32x4 t1 = v_load(ptr); + v_int32x4 t2 = v_load(ptr+4); + return t1 + t2 + c; } -template -inline v_reg::w_type, V_TypeTraits<_Tp>::nlanes128 / 2> -v_load_expand(const _Tp* ptr) +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) { -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); + int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; + vsetvlmax_e64m2(); + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b)); + v_int64x2 t1 = v_load(ptr); + v_int64x2 t2 = v_load(ptr+2); + return t1 + t2; +} +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ + int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; + vsetvlmax_e64m2(); + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b)); + v_int64x2 t1 = v_load(ptr); + v_int64x2 t2 = v_load(ptr+2); + return t1 + t2 + c; +} + + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ + unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + vsetvlmax_e32m4(); + vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b)); + v_uint32x4 t1 = v_load(ptr); + v_uint32x4 t2 = v_load(ptr+4); + v_uint32x4 t3 = v_load(ptr+8); + v_uint32x4 t4 = v_load(ptr+12); + return t1 + t2 + t3 + t4; +} +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ + unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + vsetvlmax_e32m4(); + vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b)); + v_uint32x4 t1 = v_load(ptr); + v_uint32x4 t2 = v_load(ptr+4); + v_uint32x4 t3 = v_load(ptr+8); + v_uint32x4 t4 = v_load(ptr+12); + return t1 + t2 + t3 + t4 + c; +} +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ + int CV_DECL_ALIGNED(32) ptr[16] = {0}; + vsetvlmax_e32m4(); + vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b)); + v_int32x4 t1 = v_load(ptr); + v_int32x4 t2 = v_load(ptr+4); + v_int32x4 t3 = v_load(ptr+8); + v_int32x4 t4 = v_load(ptr+12); + return t1 + t2 + t3 + t4; +} +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ + int CV_DECL_ALIGNED(32) ptr[16] = {0}; + vsetvlmax_e32m4(); + vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b)); + v_int32x4 t1 = v_load(ptr); + v_int32x4 t2 = v_load(ptr+4); + v_int32x4 t3 = v_load(ptr+8); + v_int32x4 t4 = v_load(ptr+12); + return t1 + t2 + t3 + t4 + c; +} + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ + uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + vsetvlmax_e64m4(); + vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b)); + v_uint64x2 t1 = v_load(ptr); + v_uint64x2 t2 = v_load(ptr+2); + v_uint64x2 t3 = v_load(ptr+4); + v_uint64x2 t4 = v_load(ptr+6); + return t1 + t2 + t3 + t4; +} +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ + uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + vsetvlmax_e64m4(); + vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b)); + v_uint64x2 t1 = v_load(ptr); + v_uint64x2 t2 = v_load(ptr+2); + v_uint64x2 t3 = v_load(ptr+4); + v_uint64x2 t4 = v_load(ptr+6); + return t1 + t2 + t3 + t4 + c; +} +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) +{ + int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + vsetvlmax_e64m4(); + vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b)); + v_int64x2 t1 = v_load(ptr); + v_int64x2 t2 = v_load(ptr+2); + v_int64x2 t3 = v_load(ptr+4); + v_int64x2 t4 = v_load(ptr+6); + return t1 + t2 + t3 + t4; +} +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ + int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + vsetvlmax_e64m4(); + vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b)); + v_int64x2 t1 = v_load(ptr); + v_int64x2 t2 = v_load(ptr+2); + v_int64x2 t3 = v_load(ptr+4); + v_int64x2 t4 = v_load(ptr+6); + return t1 + t2 + t3 + t4 + c; +} + +// 32 >> 64f +#if CV_SIMD128_64F +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod_fast(a, b)); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } #endif - typedef typename V_TypeTraits<_Tp>::w_type w_type; - v_reg::nlanes128> c; - for( int i = 0; i < c.nlanes; i++ ) - { - c.s[i] = ptr[i]; - } - return c; -} -template -inline v_reg::q_type, V_TypeTraits<_Tp>::nlanes128 / 4> -v_load_expand_q(const _Tp* ptr) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - typedef typename V_TypeTraits<_Tp>::q_type q_type; - v_reg::nlanes128> c; - for( int i = 0; i < c.nlanes; i++ ) - { - c.s[i] = ptr[i]; - } - return c; -} - -template inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, - v_reg<_Tp, n>& b) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - int i, i2; - for( i = i2 = 0; i < n; i++, i2 += 2 ) - { - a.s[i] = ptr[i2]; - b.s[i] = ptr[i2+1]; - } -} - -template inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, - v_reg<_Tp, n>& b, v_reg<_Tp, n>& c) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - int i, i3; - for( i = i3 = 0; i < n; i++, i3 += 3 ) - { - a.s[i] = ptr[i3]; - b.s[i] = ptr[i3+1]; - c.s[i] = ptr[i3+2]; - } -} - -template -inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, - v_reg<_Tp, n>& b, v_reg<_Tp, n>& c, - v_reg<_Tp, n>& d) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - int i, i4; - for( i = i4 = 0; i < n; i++, i4 += 4 ) - { - a.s[i] = ptr[i4]; - b.s[i] = ptr[i4+1]; - c.s[i] = ptr[i4+2]; - d.s[i] = ptr[i4+3]; - } -} - -template -inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, - const v_reg<_Tp, n>& b, - hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - int i, i2; - for( i = i2 = 0; i < n; i++, i2 += 2 ) - { - ptr[i2] = a.s[i]; - ptr[i2+1] = b.s[i]; - } -} - -template -inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, - const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, - hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - int i, i3; - for( i = i3 = 0; i < n; i++, i3 += 3 ) - { - ptr[i3] = a.s[i]; - ptr[i3+1] = b.s[i]; - ptr[i3+2] = c.s[i]; - } -} - -template inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, - const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, - const v_reg<_Tp, n>& d, - hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - int i, i4; - for( i = i4 = 0; i < n; i++, i4 += 4 ) - { - ptr[i4] = a.s[i]; - ptr[i4+1] = b.s[i]; - ptr[i4+2] = c.s[i]; - ptr[i4+3] = d.s[i]; - } -} - -template -inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - for( int i = 0; i < n; i++ ) - ptr[i] = a.s[i]; -} - -template -inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - v_store(ptr, a); -} - -template -inline void v_store_low(_Tp* ptr, const v_reg<_Tp, n>& a) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - for( int i = 0; i < (n/2); i++ ) - ptr[i] = a.s[i]; -} - -template -inline void v_store_high(_Tp* ptr, const v_reg<_Tp, n>& a) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - for( int i = 0; i < (n/2); i++ ) - ptr[i] = a.s[i+(n/2)]; -} - -template -inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a) -{ - CV_Assert(isAligned)>(ptr)); - v_store(ptr, a); -} - -template -inline void v_store_aligned_nocache(_Tp* ptr, const v_reg<_Tp, n>& a) -{ - CV_Assert(isAligned)>(ptr)); - v_store(ptr, a); -} - -template -inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/) -{ - CV_Assert(isAligned)>(ptr)); - v_store(ptr, a); -} - -template -inline v_reg<_Tp, n> v_combine_low(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < (n/2); i++ ) - { - c.s[i] = a.s[i]; - c.s[i+(n/2)] = b.s[i]; - } - return c; -} - -template -inline v_reg<_Tp, n> v_combine_high(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < (n/2); i++ ) - { - c.s[i] = a.s[i+(n/2)]; - c.s[i+(n/2)] = b.s[i+(n/2)]; - } - return c; -} - -template -inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - v_reg<_Tp, n>& low, v_reg<_Tp, n>& high) -{ - for( int i = 0; i < (n/2); i++ ) - { - low.s[i] = a.s[i]; - low.s[i+(n/2)] = b.s[i]; - high.s[i] = a.s[i+(n/2)]; - high.s[i+(n/2)] = b.s[i+(n/2)]; - } -} - -template -inline v_reg<_Tp, n> v_reverse(const v_reg<_Tp, n>& a) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = a.s[n-i-1]; - return c; -} - -template -inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> r; - const int shift = n - s; - int i = 0; - for (; i < shift; ++i) - r.s[i] = a.s[i+s]; - for (; i < n; ++i) - r.s[i] = b.s[i-shift]; - return r; -} - -template -inline _Tp v_extract_n(const v_reg<_Tp, n>& v) -{ - CV_DbgAssert(s >= 0 && s < n); - return v.s[s]; -} - -template -inline v_reg<_Tp, n> v_broadcast_element(const v_reg<_Tp, n>& a) -{ - CV_DbgAssert(i >= 0 && i < n); - return v_reg<_Tp, n>::all(a.s[i]); -} - -template inline v_reg v_round(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = cvRound(a.s[i]); - return c; -} - -template inline v_reg v_round(const v_reg& a, const v_reg& b) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = cvRound(a.s[i]); - c.s[i+n] = cvRound(b.s[i]); - } - return c; -} - -template inline v_reg v_floor(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = cvFloor(a.s[i]); - return c; -} - -template inline v_reg v_ceil(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = cvCeil(a.s[i]); - return c; -} - -template inline v_reg v_trunc(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (int)(a.s[i]); - return c; -} - -template inline v_reg v_round(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = cvRound(a.s[i]); - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_floor(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = cvFloor(a.s[i]); - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_ceil(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = cvCeil(a.s[i]); - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_trunc(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = cvCeil(a.s[i]); - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_cvt_f32(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (float)a.s[i]; - return c; -} - -template inline v_reg v_cvt_f32(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = (float)a.s[i]; - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_cvt_f32(const v_reg& a, const v_reg& b) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = (float)a.s[i]; - c.s[i+n] = (float)b.s[i]; - } - return c; -} - -CV_INLINE v_reg v_cvt_f64(const v_reg& a) -{ - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (double)a.s[i]; - return c; -} - -CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) -{ - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (double)a.s[i + 2]; - return c; -} - -CV_INLINE v_reg v_cvt_f64(const v_reg& a) -{ - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (double)a.s[i]; - return c; -} - -CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) -{ - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (double)a.s[i + 2]; - return c; -} - -CV_INLINE v_reg v_cvt_f64(const v_reg& a) -{ - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (double)a.s[i]; - return c; -} - -CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) -{ - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (double)a.s[i]; - return c; -} - - -template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut(const _Tp* tab, const int* idx) -{ - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) - c.s[i] = tab[idx[i]]; - return c; -} -template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_pairs(const _Tp* tab, const int* idx) -{ - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) - c.s[i] = tab[idx[i / 2] + i % 2]; - return c; -} -template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_quads(const _Tp* tab, const int* idx) -{ - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) - c.s[i] = tab[idx[i / 4] + i % 4]; - return c; -} - -template inline v_reg v_lut(const int* tab, const v_reg& idx) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = tab[idx.s[i]]; - return c; -} - -template inline v_reg v_lut(const unsigned* tab, const v_reg& idx) -{ - v_reg c; - for (int i = 0; i < n; i++) - c.s[i] = tab[idx.s[i]]; - return c; -} - -template inline v_reg v_lut(const float* tab, const v_reg& idx) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = tab[idx.s[i]]; - return c; -} - -template inline v_reg v_lut(const double* tab, const v_reg& idx) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = tab[idx.s[i]]; - return c; -} - - -inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) -{ - return v_lut(tab, idxvec.s); -} - -inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) -{ - return v_lut(tab, idxvec.s); -} - -inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) -{ - return v_lut(tab, idxvec.s); -} - -inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) -{ - return v_lut(tab, idxvec.s); -} - - -template inline void v_lut_deinterleave(const float* tab, const v_reg& idx, - v_reg& x, v_reg& y) -{ - for( int i = 0; i < n; i++ ) - { - int j = idx.s[i]; - x.s[i] = tab[j]; - y.s[i] = tab[j+1]; - } -} - -template inline void v_lut_deinterleave(const double* tab, const v_reg& idx, - v_reg& x, v_reg& y) -{ - for( int i = 0; i < n; i++ ) - { - int j = idx.s[i]; - x.s[i] = tab[j]; - y.s[i] = tab[j+1]; - } -} - -template inline v_reg<_Tp, n> v_interleave_pairs(const v_reg<_Tp, n>& vec) -{ - v_reg<_Tp, n> c; - for (int i = 0; i < n/4; i++) - { - c.s[4*i ] = vec.s[4*i ]; - c.s[4*i+1] = vec.s[4*i+2]; - c.s[4*i+2] = vec.s[4*i+1]; - c.s[4*i+3] = vec.s[4*i+3]; - } - return c; -} - -template inline v_reg<_Tp, n> v_interleave_quads(const v_reg<_Tp, n>& vec) -{ - v_reg<_Tp, n> c; - for (int i = 0; i < n/8; i++) - { - c.s[8*i ] = vec.s[8*i ]; - c.s[8*i+1] = vec.s[8*i+4]; - c.s[8*i+2] = vec.s[8*i+1]; - c.s[8*i+3] = vec.s[8*i+5]; - c.s[8*i+4] = vec.s[8*i+2]; - c.s[8*i+5] = vec.s[8*i+6]; - c.s[8*i+6] = vec.s[8*i+3]; - c.s[8*i+7] = vec.s[8*i+7]; - } - return c; -} - -template inline v_reg<_Tp, n> v_pack_triplets(const v_reg<_Tp, n>& vec) -{ - v_reg<_Tp, n> c; - for (int i = 0; i < n/4; i++) - { - c.s[3*i ] = vec.s[4*i ]; - c.s[3*i+1] = vec.s[4*i+1]; - c.s[3*i+2] = vec.s[4*i+2]; - } - return c; -} - -template -inline void v_transpose4x4( v_reg<_Tp, 4>& a0, const v_reg<_Tp, 4>& a1, - const v_reg<_Tp, 4>& a2, const v_reg<_Tp, 4>& a3, - v_reg<_Tp, 4>& b0, v_reg<_Tp, 4>& b1, - v_reg<_Tp, 4>& b2, v_reg<_Tp, 4>& b3 ) -{ - b0 = v_reg<_Tp, 4>(a0.s[0], a1.s[0], a2.s[0], a3.s[0]); - b1 = v_reg<_Tp, 4>(a0.s[1], a1.s[1], a2.s[1], a3.s[1]); - b2 = v_reg<_Tp, 4>(a0.s[2], a1.s[2], a2.s[2], a3.s[2]); - b3 = v_reg<_Tp, 4>(a0.s[3], a1.s[3], a2.s[3], a3.s[3]); -} - -#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, _Tp, suffix) \ -inline _Tpvec v_setzero_##suffix() { return _Tpvec::zero(); } - -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x16, uchar, u8) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16, schar, s8) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x8, ushort, u16) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8, short, s16) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4, int, s32) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x4, float, f32) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2, double, f64) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x2, uint64, u64) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2, int64, s64) - -#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, suffix) \ -inline _Tpvec v_setall_##suffix(_Tp val) { return _Tpvec::all(val); } - -OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x16, uchar, u8) -OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16, schar, s8) -OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x8, ushort, u16) -OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8, short, s16) -OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4, int, s32) -OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x4, float, f32) -OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2, double, f64) -OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x2, uint64, u64) -OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2, int64, s64) - -#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tpvec, _Tp, suffix) \ -template inline _Tpvec \ - v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \ -{ return a.template reinterpret_as<_Tp, _Tpvec::nlanes>(); } - -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint8x16, uchar, u8) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int8x16, schar, s8) -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint16x8, ushort, u16) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int16x8, short, s16) -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int32x4, int, s32) -OPENCV_HAL_IMPL_C_REINTERPRET(v_float32x4, float, f32) -OPENCV_HAL_IMPL_C_REINTERPRET(v_float64x2, double, f64) -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint64x2, uint64, u64) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int64x2, int64, s64) - -#define OPENCV_HAL_IMPL_C_SHIFTL(_Tpvec, _Tp) \ -template inline _Tpvec v_shl(const _Tpvec& a) \ -{ return a << n; } - -OPENCV_HAL_IMPL_C_SHIFTL(v_uint16x8, ushort) -OPENCV_HAL_IMPL_C_SHIFTL(v_int16x8, short) -OPENCV_HAL_IMPL_C_SHIFTL(v_uint32x4, unsigned) -OPENCV_HAL_IMPL_C_SHIFTL(v_int32x4, int) -OPENCV_HAL_IMPL_C_SHIFTL(v_uint64x2, uint64) -OPENCV_HAL_IMPL_C_SHIFTL(v_int64x2, int64) - -#define OPENCV_HAL_IMPL_C_SHIFTR(_Tpvec, _Tp) \ -template inline _Tpvec v_shr(const _Tpvec& a) \ -{ return a >> n; } - -OPENCV_HAL_IMPL_C_SHIFTR(v_uint16x8, ushort) -OPENCV_HAL_IMPL_C_SHIFTR(v_int16x8, short) -OPENCV_HAL_IMPL_C_SHIFTR(v_uint32x4, unsigned) -OPENCV_HAL_IMPL_C_SHIFTR(v_int32x4, int) -OPENCV_HAL_IMPL_C_SHIFTR(v_uint64x2, uint64) -OPENCV_HAL_IMPL_C_SHIFTR(v_int64x2, int64) - -#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tpvec, _Tp) \ -template inline _Tpvec v_rshr(const _Tpvec& a) \ -{ \ - _Tpvec c; \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ - return c; \ -} - -OPENCV_HAL_IMPL_C_RSHIFTR(v_uint16x8, ushort) -OPENCV_HAL_IMPL_C_RSHIFTR(v_int16x8, short) -OPENCV_HAL_IMPL_C_RSHIFTR(v_uint32x4, unsigned) -OPENCV_HAL_IMPL_C_RSHIFTR(v_int32x4, int) -OPENCV_HAL_IMPL_C_RSHIFTR(v_uint64x2, uint64) -OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64) - -#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix, cast) \ -inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ -{ \ - _Tpnvec c; \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - { \ - c.s[i] = cast<_Tpn>(a.s[i]); \ - c.s[i+_Tpvec::nlanes] = cast<_Tpn>(b.s[i]); \ - } \ - return c; \ -} - -OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u, saturate_cast) - -#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ -template inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ -{ \ - _Tpnvec c; \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - { \ - c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ - c.s[i+_Tpvec::nlanes] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \ - } \ - return c; \ -} - -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) - -#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ -inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ -{ \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - ptr[i] = cast<_Tpn>(a.s[i]); \ -} - -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) - -#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ -template inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ -{ \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ -} - -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) - -template -inline void _pack_b(_Tpm* mptr, const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - for (int i = 0; i < n; ++i) - { - mptr[i] = (_Tpm)a.s[i]; - mptr[i + n] = (_Tpm)b.s[i]; - } -} - - - -inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) -{ - v_uint8x16 mask; - _pack_b(mask.s, a, b); - return mask; -} - - -inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, - const v_uint32x4& c, const v_uint32x4& d) -{ - v_uint8x16 mask; - _pack_b(mask.s, a, b); - _pack_b(mask.s + 8, c, d); - return mask; -} - -inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, - const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, - const v_uint64x2& g, const v_uint64x2& h) -{ - v_uint8x16 mask; - _pack_b(mask.s, a, b); - _pack_b(mask.s + 4, c, d); - _pack_b(mask.s + 8, e, f); - _pack_b(mask.s + 12, g, h); - return mask; -} inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, const v_float32x4& m1, const v_float32x4& m2, const v_float32x4& m3) { - return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + v.s[3]*m3.s[0], - v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + v.s[3]*m3.s[1], - v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + v.s[3]*m3.s[2], - v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + v.s[3]*m3.s[3]); + vsetvlmax_e32m1(); + vfloat32m1_t res = vfmul_vf_f32m1(m0, v_extract_n<0>(v)); + res = vfmacc_vf_f32m1(res, v_extract_n<1>(v), m1); + res = vfmacc_vf_f32m1(res, v_extract_n<2>(v), m2); + res = vfmacc_vf_f32m1(res, v_extract_n<3>(v), m3); + return v_float32x4(res); } inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, const v_float32x4& m1, const v_float32x4& m2, - const v_float32x4& m3) + const v_float32x4& a) { - return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + m3.s[0], - v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + m3.s[1], - v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + m3.s[2], - v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + m3.s[3]); + vsetvlmax_e32m1(); + vfloat32m1_t res = vfmul_vf_f32m1(m0, v_extract_n<0>(v)); + res = vfmacc_vf_f32m1(res, v_extract_n<1>(v), m1); + res = vfmacc_vf_f32m1(res, v_extract_n<2>(v), m2); + return v_float32x4(res) + a; +} + +#define OPENCV_HAL_IMPL_RVV_MUL_EXPAND(_Tpvec, _Tpwvec, _Tpw, suffix, wmul, width) \ +inline void v_mul_expand(const _Tpvec& a, const _Tpvec& b, _Tpwvec& c, _Tpwvec& d) \ +{ \ + _Tpw CV_DECL_ALIGNED(32) ptr[_Tpwvec::nlanes*2] = {0}; \ + vsetvlmax_e##width##m2(); \ + vse##width##_v_##suffix##m2(ptr, wmul(a, b)); \ + vsetvlmax_e##width##m1(); \ + c = _Tpwvec(vle##width##_v_##suffix##m1(ptr)); \ + d = _Tpwvec(vle##width##_v_##suffix##m1(ptr+_Tpwvec::nlanes)); \ +} + +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint8x16, v_uint16x8, ushort, u16, vwmulu_vv_u16m2, 16) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_int8x16, v_int16x8, short, i16, vwmul_vv_i16m2, 16) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint16x8, v_uint32x4, unsigned, u32, vwmulu_vv_u32m2, 32) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_int16x8, v_int32x4, int, i32, vwmul_vv_i32m2, 32) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint32x4, v_uint64x2, uint64, u64, vwmulu_vv_u64m2, 64) + + +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) +{ + vsetvlmax_e16m1(); + return v_int16x8(vnsra_wx_i16m1(vwmul_vv_i32m2(a, b), 16)); +} +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) +{ + vsetvlmax_e16m1(); + return v_uint16x8(vnsrl_wx_u16m1(vwmulu_vv_u32m2(a, b), 16)); } -inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) -{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_cvt_f64_high(a) * v_cvt_f64_high(b)); } -inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) -{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_fma(v_cvt_f64_high(a), v_cvt_f64_high(b), c)); } +//////// Saturating Multiply //////// -inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) -{ return v_dotprod_expand(a, b); } -inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) -{ return v_dotprod_expand(a, b, c); } - -////// FP16 support /////// - -inline v_reg::nlanes128> -v_load_expand(const float16_t* ptr) -{ - v_reg::nlanes128> v; - for( int i = 0; i < v.nlanes; i++ ) - { - v.s[i] = ptr[i]; - } - return v; +#define OPENCV_HAL_IMPL_RVV_MUL_SAT(_Tpvec, _wTpvec) \ +inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ +{ \ + _wTpvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ +} \ +inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ +{ \ + a = a * b; \ + return a; \ } -inline void -v_pack_store(float16_t* ptr, const v_reg::nlanes128>& v) -{ - for( int i = 0; i < v.nlanes; i++ ) - { - ptr[i] = float16_t(v.s[i]); - } -} +OPENCV_HAL_IMPL_RVV_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_RVV_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_RVV_MUL_SAT(v_uint16x8, v_uint32x4) +OPENCV_HAL_IMPL_RVV_MUL_SAT(v_int16x8, v_int32x4) + inline void v_cleanup() {} - -#ifndef CV_DOXYGEN CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END -#endif + + } #endif diff --git a/platforms/linux/riscv64-gcc.toolchain.cmake b/platforms/linux/riscv64-gcc.toolchain.cmake new file mode 100644 index 0000000000..c46d62a360 --- /dev/null +++ b/platforms/linux/riscv64-gcc.toolchain.cmake @@ -0,0 +1,20 @@ +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR riscv64) + +set(RISCV_GCC_INSTALL_ROOT /opt/RISCV CACHE PATH "Path to GCC for RISC-V cross compiler installation directory") +set(CMAKE_SYSROOT ${RISCV_GCC_INSTALL_ROOT}/sysroot CACHE PATH "RISC-V sysroot") + +set(CMAKE_C_COMPILER ${RISCV_GCC_INSTALL_ROOT}/bin/riscv64-unknown-linux-gnu-gcc) +set(CMAKE_CXX_COMPILER ${RISCV_GCC_INSTALL_ROOT}/bin/riscv64-unknown-linux-gnu-g++) + +# Don't run the linker on compiler check +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + +set(CMAKE_C_FLAGS "-march=rv64gcv_zvqmac ${CMAKE_C_FLAGS}") +set(CMAKE_CXX_FLAGS "-march=rv64gcv_zvqmac ${CXX_FLAGS}") + +set(CMAKE_FIND_ROOT_PATH ${CMAKE_SYSROOT}) +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) \ No newline at end of file From d35e2f533905a65cf1b5c3ff7b5cfc11b210aaf9 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 2 Dec 2020 00:21:19 +0000 Subject: [PATCH 022/422] core(ipp): workaround getIppTopFeatures() value mismatch --- modules/core/src/system.cpp | 15 +++++++++------ modules/ts/src/ts.cpp | 4 +++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index fcb9ea45ef..ad688a6c68 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -2429,6 +2429,13 @@ public: ippTopFeatures = ippCPUID_SSE42; pIppLibInfo = ippiGetLibVersion(); + + // workaround: https://github.com/opencv/opencv/issues/12959 + std::string ippName(pIppLibInfo->Name ? pIppLibInfo->Name : ""); + if (ippName.find("SSE4.2") != std::string::npos) + { + ippTopFeatures = ippCPUID_SSE42; + } } public: @@ -2468,16 +2475,12 @@ int getIppFeatures() #endif } -unsigned long long getIppTopFeatures(); - +#ifdef HAVE_IPP unsigned long long getIppTopFeatures() { -#ifdef HAVE_IPP return getIPPSingleton().ippTopFeatures; -#else - return 0; -#endif } +#endif void setIppStatus(int status, const char * const _funcname, const char * const _filename, int _line) { diff --git a/modules/ts/src/ts.cpp b/modules/ts/src/ts.cpp index bad799dc4d..13f5eff251 100644 --- a/modules/ts/src/ts.cpp +++ b/modules/ts/src/ts.cpp @@ -1122,7 +1122,9 @@ void SystemInfoCollector::OnTestProgramStart(const testing::UnitTest&) } recordPropertyVerbose("cv_cpu_features", "CPU features", cv::getCPUFeaturesLine()); #ifdef HAVE_IPP - recordPropertyVerbose("cv_ipp_version", "Intel(R) IPP version", cv::ipp::useIPP() ? cv::ipp::getIppVersion() : "disabled"); + recordPropertyVerbose("cv_ipp_version", "Intel(R) IPP version", cv::ipp::useIPP() ? cv::ipp::getIppVersion() : "disabled"); + if (cv::ipp::useIPP()) + recordPropertyVerbose("cv_ipp_features", "Intel(R) IPP features code", cv::format("0x%llx", cv::ipp::getIppTopFeatures())); #endif #ifdef HAVE_OPENCL cv::dumpOpenCLInformation(); From c42d47d94ad5fb1343ed70ce1c6a73dbe7073900 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 2 Dec 2020 12:34:24 +0000 Subject: [PATCH 023/422] cmake: clean cached INTERNAL variable used for 3rdparty deps --- cmake/OpenCVFindLibsGrfmt.cmake | 8 +++++++- cmake/OpenCVUtils.cmake | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/cmake/OpenCVFindLibsGrfmt.cmake b/cmake/OpenCVFindLibsGrfmt.cmake index 4ad44fe833..f99bb33c80 100644 --- a/cmake/OpenCVFindLibsGrfmt.cmake +++ b/cmake/OpenCVFindLibsGrfmt.cmake @@ -6,6 +6,7 @@ if(BUILD_ZLIB) ocv_clear_vars(ZLIB_FOUND) else() + ocv_clear_internal_cache_vars(ZLIB_LIBRARY ZLIB_INCLUDE_DIR) find_package(ZLIB "${MIN_VER_ZLIB}") if(ZLIB_FOUND AND ANDROID) if(ZLIB_LIBRARIES MATCHES "/usr/(lib|lib32|lib64)/libz.so$") @@ -31,11 +32,12 @@ if(WITH_JPEG) if(BUILD_JPEG) ocv_clear_vars(JPEG_FOUND) else() + ocv_clear_internal_cache_vars(JPEG_LIBRARY JPEG_INCLUDE_DIR) include(FindJPEG) endif() if(NOT JPEG_FOUND) - ocv_clear_vars(JPEG_LIBRARY JPEG_LIBRARIES JPEG_INCLUDE_DIR) + ocv_clear_vars(JPEG_LIBRARY JPEG_INCLUDE_DIR) if(NOT BUILD_JPEG_TURBO_DISABLE) set(JPEG_LIBRARY libjpeg-turbo CACHE INTERNAL "") @@ -76,6 +78,7 @@ if(WITH_TIFF) if(BUILD_TIFF) ocv_clear_vars(TIFF_FOUND) else() + ocv_clear_internal_cache_vars(TIFF_LIBRARY TIFF_INCLUDE_DIR) include(FindTIFF) if(TIFF_FOUND) ocv_parse_header("${TIFF_INCLUDE_DIR}/tiff.h" TIFF_VERSION_LINES TIFF_VERSION_CLASSIC TIFF_VERSION_BIG TIFF_VERSION TIFF_BIGTIFF_VERSION) @@ -119,6 +122,7 @@ if(WITH_WEBP) if(BUILD_WEBP) ocv_clear_vars(WEBP_FOUND WEBP_LIBRARY WEBP_LIBRARIES WEBP_INCLUDE_DIR) else() + ocv_clear_internal_cache_vars(WEBP_LIBRARY WEBP_INCLUDE_DIR) include(cmake/OpenCVFindWebP.cmake) if(WEBP_FOUND) set(HAVE_WEBP 1) @@ -184,6 +188,7 @@ if(WITH_PNG) if(BUILD_PNG) ocv_clear_vars(PNG_FOUND) else() + ocv_clear_internal_cache_vars(PNG_LIBRARY PNG_INCLUDE_DIR) include(FindPNG) if(PNG_FOUND) include(CheckIncludeFile) @@ -215,6 +220,7 @@ endif() if(WITH_OPENEXR) ocv_clear_vars(HAVE_OPENEXR) if(NOT BUILD_OPENEXR) + ocv_clear_internal_cache_vars(OPENEXR_INCLUDE_PATHS OPENEXR_LIBRARIES OPENEXR_ILMIMF_LIBRARY OPENEXR_VERSION) include("${OpenCV_SOURCE_DIR}/cmake/OpenCVFindOpenEXR.cmake") endif() diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index 693a840ffe..6ae2cbcf8b 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -400,6 +400,24 @@ macro(ocv_clear_vars) endforeach() endmacro() + +# Clears passed variables with INTERNAL type from CMake cache +macro(ocv_clear_internal_cache_vars) + foreach(_var ${ARGN}) + get_property(_propertySet CACHE ${_var} PROPERTY TYPE SET) + if(_propertySet) + get_property(_type CACHE ${_var} PROPERTY TYPE) + if(_type STREQUAL "INTERNAL") + message("Cleaning INTERNAL cached variable: ${_var}") + unset(${_var} CACHE) + endif() + endif() + endforeach() + unset(_propertySet) + unset(_type) +endmacro() + + set(OCV_COMPILER_FAIL_REGEX "argument .* is not valid" # GCC 9+ (including support of unicode quotes) "command[- ]line option .* is valid for .* but not for C\\+\\+" # GNU From 6f8120cb3a2b9613bb4811d37ae4efe54265611e Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 2 Dec 2020 15:02:43 +0000 Subject: [PATCH 024/422] core(UMat): drop unavailable methods --- modules/core/include/opencv2/core/mat.hpp | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index 05edeac523..0922db9084 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -2432,20 +2432,11 @@ public: UMat(const UMat& m, const Rect& roi); UMat(const UMat& m, const Range* ranges); UMat(const UMat& m, const std::vector& ranges); + + // FIXIT copyData=false is not implemented, drop this in favor of cv::Mat (OpenCV 5.0) //! builds matrix from std::vector with or without copying the data template explicit UMat(const std::vector<_Tp>& vec, bool copyData=false); - //! builds matrix from cv::Vec; the data is copied by default - template explicit UMat(const Vec<_Tp, n>& vec, bool copyData=true); - //! builds matrix from cv::Matx; the data is copied by default - template explicit UMat(const Matx<_Tp, m, n>& mtx, bool copyData=true); - //! builds matrix from a 2D point - template explicit UMat(const Point_<_Tp>& pt, bool copyData=true); - //! builds matrix from a 3D point - template explicit UMat(const Point3_<_Tp>& pt, bool copyData=true); - //! builds matrix from comma initializer - template explicit UMat(const MatCommaInitializer_<_Tp>& commaInitializer); - //! destructor - calls release() ~UMat(); //! assignment operators From 8e32566583c5249249aa044b93e497aeb35ae1aa Mon Sep 17 00:00:00 2001 From: Kong Liangqian Date: Tue, 24 Nov 2020 18:35:39 +0800 Subject: [PATCH 025/422] Add adding and subtraction operations between a number and a quaternion; fix a typo; Add documentation of quaternion operators; Restrict the type of scalar: the same as quaternion; --- .../core/include/opencv2/core/quaternion.hpp | 365 ++++++++++++++++-- .../include/opencv2/core/quaternion.inl.hpp | 45 ++- modules/core/test/test_quaternion.cpp | 16 +- 3 files changed, 389 insertions(+), 37 deletions(-) diff --git a/modules/core/include/opencv2/core/quaternion.hpp b/modules/core/include/opencv2/core/quaternion.hpp index c72ee8c37f..7bc51e6c6d 100644 --- a/modules/core/include/opencv2/core/quaternion.hpp +++ b/modules/core/include/opencv2/core/quaternion.hpp @@ -277,17 +277,18 @@ public: * For example * ``` * Quatd q(1,2,3,4); - * power(q, 2); + * power(q, 2.0); * * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; * double angle = CV_PI; * Vec3d axis{0, 0, 1}; * Quatd q1 = Quatd::createFromAngleAxis(angle, axis); //generate a unit quat by axis and angle - * power(q1, 2, assumeUnit);//This assumeUnit means q1 is a unit quaternion. + * power(q1, 2.0, assumeUnit);//This assumeUnit means q1 is a unit quaternion. * ``` + * @note the type of the index should be the same as the quaternion. */ - template - friend Quat power(const Quat &q, _T x, QuatAssumeType assumeUnit); + template + friend Quat power(const Quat &q, const T x, QuatAssumeType assumeUnit); /** * @brief return the value of power function with index \f$x\f$. @@ -298,17 +299,16 @@ public: * For example * ``` * Quatd q(1,2,3,4); - * q.power(2); + * q.power(2.0); * * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; * double angle = CV_PI; * Vec3d axis{0, 0, 1}; * Quatd q1 = Quatd::createFromAngleAxis(angle, axis); //generate a unit quat by axis and angle - * q1.power(2, assumeUnit); //This assumeUnt means q1 is a unit quaternion + * q1.power(2.0, assumeUnit); //This assumeUnt means q1 is a unit quaternion * ``` */ - template - Quat<_Tp> power(_T x, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + Quat<_Tp> power(const _Tp x, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; /** * @brief return \f$\sqrt{q}\f$. @@ -859,6 +859,7 @@ public: * * @sa toRotMat3x3 */ + Matx<_Tp, 4, 4> toRotMat4x4(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; /** @@ -1073,42 +1074,362 @@ public: const Quat<_Tp> &q2, const Quat<_Tp> &q3, const _Tp t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); - + /** + * @brief Return opposite quaternion \f$-p\f$ + * which satisfies \f$p + (-p) = 0.\f$ + * + * For example + * ``` + * Quatd q{1, 2, 3, 4}; + * std::cout << -q << std::endl; // [-1, -2, -3, -4] + * ``` + */ Quat<_Tp> operator-() const; + /** + * @brief return true if two quaternions p and q are nearly equal, i.e. when the absolute + * value of each \f$p_i\f$ and \f$q_i\f$ is less than CV_QUAT_EPS. + */ bool operator==(const Quat<_Tp>&) const; + /** + * @brief Addition operator of two quaternions p and q. + * It returns a new quaternion that each value is the sum of \f$p_i\f$ and \f$q_i\f$. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * std::cout << p + q << std::endl; //[6, 8, 10, 12] + * ``` + */ Quat<_Tp> operator+(const Quat<_Tp>&) const; + /** + * @brief Addition assignment operator of two quaternions p and q. + * It adds right operand to the left operand and assign the result to left operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * p += q; // equivalent to p = p + q + * std::cout << p << std::endl; //[6, 8, 10, 12] + * + * ``` + */ Quat<_Tp>& operator+=(const Quat<_Tp>&); + /** + * @brief Subtraction operator of two quaternions p and q. + * It returns a new quaternion that each value is the sum of \f$p_i\f$ and \f$-q_i\f$. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * std::cout << p - q << std::endl; //[-4, -4, -4, -4] + * ``` + */ Quat<_Tp> operator-(const Quat<_Tp>&) const; + /** + * @brief Subtraction assignment operator of two quaternions p and q. + * It subtracts right operand from the left operand and assign the result to left operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * p -= q; // equivalent to p = p - q + * std::cout << p << std::endl; //[-4, -4, -4, -4] + * + * ``` + */ Quat<_Tp>& operator-=(const Quat<_Tp>&); + /** + * @brief Multiplication assignment operator of two quaternions q and p. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of quaternion multiplication: + * \f[ + * \begin{equation} + * \begin{split} + * p * q &= [p_0, \boldsymbol{u}]*[q_0, \boldsymbol{v}]\\ + * &=[p_0q_0 - \boldsymbol{u}\cdot \boldsymbol{v}, p_0\boldsymbol{v} + q_0\boldsymbol{u}+ \boldsymbol{u}\times \boldsymbol{v}]. + * \end{split} + * \end{equation} + * \f] + * where \f$\cdot\f$ means dot product and \f$\times \f$ means cross product. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * p *= q; // equivalent to p = p * q + * std::cout << p << std::endl; //[-60, 12, 30, 24] + * ``` + */ Quat<_Tp>& operator*=(const Quat<_Tp>&); - Quat<_Tp>& operator*=(const _Tp&); + /** + * @brief Multiplication assignment operator of a quaternions and a scalar. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of quaternion multiplication with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p * s &= [w, x, y, z] * s\\ + * &=[w * s, x * s, y * s, z * s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0; + * p *= s; // equivalent to p = p * s + * std::cout << p << std::endl; //[2.0, 4.0, 6.0, 8.0] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + Quat<_Tp>& operator*=(const _Tp s); + /** + * @brief Multiplication operator of two quaternions q and p. + * Multiplies values on either side of the operator. + * + * Rule of quaternion multiplication: + * \f[ + * \begin{equation} + * \begin{split} + * p * q &= [p_0, \boldsymbol{u}]*[q_0, \boldsymbol{v}]\\ + * &=[p_0q_0 - \boldsymbol{u}\cdot \boldsymbol{v}, p_0\boldsymbol{v} + q_0\boldsymbol{u}+ \boldsymbol{u}\times \boldsymbol{v}]. + * \end{split} + * \end{equation} + * \f] + * where \f$\cdot\f$ means dot product and \f$\times \f$ means cross product. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * std::cout << p * q << std::endl; //[-60, 12, 30, 24] + * ``` + */ Quat<_Tp> operator*(const Quat<_Tp>&) const; - Quat<_Tp> operator/(const _Tp&) const; + /** + * @brief Division operator of a quaternions and a scalar. + * It divides left operand with the right operand and assign the result to left operand. + * + * Rule of quaternion division with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p / s &= [w, x, y, z] / s\\ + * &=[w/s, x/s, y/s, z/s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0; + * p /= s; // equivalent to p = p / s + * std::cout << p << std::endl; //[0.5, 1, 1.5, 2] + * ``` + * @note the type of scalar should be equal to this quaternion. + */ + Quat<_Tp> operator/(const _Tp s) const; + /** + * @brief Division operator of two quaternions p and q. + * Divides left hand operand by right hand operand. + * + * Rule of quaternion division with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p / q &= p * q.inv()\\ + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * std::cout << p / q << std::endl; // equivalent to p * q.inv() + * ``` + */ Quat<_Tp> operator/(const Quat<_Tp>&) const; - Quat<_Tp>& operator/=(const _Tp&); + /** + * @brief Division assignment operator of a quaternions and a scalar. + * It divides left operand with the right operand and assign the result to left operand. + * + * Rule of quaternion division with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p / s &= [w, x, y, z] / s\\ + * &=[w / s, x / s, y / s, z / s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0;; + * p /= s; // equivalent to p = p / s + * std::cout << p << std::endl; //[0.5, 1.0, 1.5, 2.0] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + Quat<_Tp>& operator/=(const _Tp s); + /** + * @brief Division assignment operator of two quaternions p and q; + * It divides left operand with the right operand and assign the result to left operand. + * + * Rule of quaternion division with a quaternion: + * \f[ + * \begin{equation} + * \begin{split} + * p / q&= p * q.inv()\\ + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * p /= q; // equivalent to p = p * q.inv() + * std::cout << p << std::endl; + * ``` + */ Quat<_Tp>& operator/=(const Quat<_Tp>&); _Tp& operator[](std::size_t n); const _Tp& operator[](std::size_t n) const; - template - friend Quat cv::operator*(const T, const Quat&); + /** + * @brief Subtraction operator of a scalar and a quaternions. + * Subtracts right hand operand from left hand operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double scalar = 2.0; + * std::cout << scalar - p << std::endl; //[1.0, -2, -3, -4] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator-(const T s, const Quat&); - template - friend Quat cv::operator*(const Quat&, const T); + /** + * @brief Subtraction operator of a quaternions and a scalar. + * Subtracts right hand operand from left hand operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double scalar = 2.0; + * std::cout << p - scalar << std::endl; //[-1.0, 2, 3, 4] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator-(const Quat&, const T s); + + /** + * @brief Addition operator of a quaternions and a scalar. + * Adds right hand operand from left hand operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double scalar = 2.0; + * std::cout << scalar + p << std::endl; //[3.0, 2, 3, 4] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator+(const T s, const Quat&); + + /** + * @brief Addition operator of a quaternions and a scalar. + * Adds right hand operand from left hand operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double scalar = 2.0; + * std::cout << p + scalar << std::endl; //[3.0, 2, 3, 4] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator+(const Quat&, const T s); + + /** + * @brief Multiplication operator of a scalar and a quaternions. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of quaternion multiplication with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p * s &= [w, x, y, z] * s\\ + * &=[w * s, x * s, y * s, z * s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0; + * std::cout << s * p << std::endl; //[2.0, 4.0, 6.0, 8.0] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator*(const T s, const Quat&); + + /** + * @brief Multiplication operator of a quaternion and a scalar. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of quaternion multiplication with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p * s &= [w, x, y, z] * s\\ + * &=[w * s, x * s, y * s, z * s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0; + * std::cout << p * s << std::endl; //[2.0, 4.0, 6.0, 8.0] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator*(const Quat&, const T s); template friend std::ostream& cv::operator<<(std::ostream&, const Quat&); @@ -1165,8 +1486,8 @@ Quat exp(const Quat &q); template Quat log(const Quat &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); -template -Quat power(const Quat& q, _T x, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); +template +Quat power(const Quat& q, const T x, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); template Quat crossProduct(const Quat &p, const Quat &q); @@ -1174,11 +1495,11 @@ Quat crossProduct(const Quat &p, const Quat &q); template Quat sqrt(const Quat &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); -template -Quat operator*(const T, const Quat&); +template +Quat operator*(const T, const Quat&); -template -Quat operator*(const Quat&, const T); +template +Quat operator*(const Quat&, const T); template std::ostream& operator<<(std::ostream&, const Quat&); diff --git a/modules/core/include/opencv2/core/quaternion.inl.hpp b/modules/core/include/opencv2/core/quaternion.inl.hpp index 769f53ed4b..f665dbe6c8 100644 --- a/modules/core/include/opencv2/core/quaternion.inl.hpp +++ b/modules/core/include/opencv2/core/quaternion.inl.hpp @@ -148,6 +148,30 @@ inline Quat Quat::operator+(const Quat &q1) const return Quat(w + q1.w, x + q1.x, y + q1.y, z + q1.z); } +template +inline Quat operator+(const T a, const Quat& q) +{ + return Quat(q.w + a, q.x, q.y, q.z); +} + +template +inline Quat operator+(const Quat& q, const T a) +{ + return Quat(q.w + a, q.x, q.y, q.z); +} + +template +inline Quat operator-(const T a, const Quat& q) +{ + return Quat(a - q.w, -q.x, -q.y, -q.z); +} + +template +inline Quat operator-(const Quat& q, const T a) +{ + return Quat(q.w - a, q.x, q.y, q.z); +} + template inline Quat Quat::operator-(const Quat &q1) const { @@ -183,14 +207,14 @@ inline Quat Quat::operator*(const Quat &q1) const } -template -Quat operator*(const Quat &q1, const S a) +template +Quat operator*(const Quat &q1, const T a) { return Quat(a * q1.w, a * q1.x, a * q1.y, a * q1.z); } -template -Quat operator*(const S a, const Quat &q1) +template +Quat operator*(const T a, const Quat &q1) { return Quat(a * q1.w, a * q1.x, a * q1.y, a * q1.z); } @@ -221,7 +245,7 @@ inline Quat& Quat::operator/=(const Quat &q1) return *this; } template -Quat& Quat::operator*=(const T &q1) +Quat& Quat::operator*=(const T q1) { w *= q1; x *= q1; @@ -231,7 +255,7 @@ Quat& Quat::operator*=(const T &q1) } template -inline Quat& Quat::operator/=(const T &a) +inline Quat& Quat::operator/=(const T a) { const T a_inv = 1.0 / a; w *= a_inv; @@ -242,7 +266,7 @@ inline Quat& Quat::operator/=(const T &a) } template -inline Quat Quat::operator/(const T &a) const +inline Quat Quat::operator/(const T a) const { const T a_inv = 1.0 / a; return Quat(w * a_inv, x * a_inv, y * a_inv, z * a_inv); @@ -353,15 +377,14 @@ Quat Quat::log(QuatAssumeType assumeUnit) const return Quat(std::log(qNorm), v[0] * k, v[1] * k, v[2] *k); } -template -inline Quat power(const Quat &q1, _T alpha, QuatAssumeType assumeUnit) +template +inline Quat power(const Quat &q1, const T alpha, QuatAssumeType assumeUnit) { return q1.power(alpha, assumeUnit); } template -template -inline Quat Quat::power(_T alpha, QuatAssumeType assumeUnit) const +inline Quat Quat::power(const T alpha, QuatAssumeType assumeUnit) const { if (x * x + y * y + z * z > CV_QUAT_EPS) { diff --git a/modules/core/test/test_quaternion.cpp b/modules/core/test/test_quaternion.cpp index 0025674ec7..324d535bff 100644 --- a/modules/core/test/test_quaternion.cpp +++ b/modules/core/test/test_quaternion.cpp @@ -18,7 +18,7 @@ protected: } double scalar = 2.5; double angle = CV_PI; - int qNorm2 = 2; + double qNorm2 = 2; Vec axis{1, 1, 1}; Vec unAxis{0, 0, 0}; Vec unitAxis{1.0 / sqrt(3), 1.0 / sqrt(3), 1.0 / sqrt(3)}; @@ -124,7 +124,7 @@ TEST_F(QuatTest, basicfuns){ EXPECT_EQ(exp(qNull), qIdentity); EXPECT_EQ(exp(Quatd(0, angle * unitAxis[0] / 2, angle * unitAxis[1] / 2, angle * unitAxis[2] / 2)), q3); - EXPECT_EQ(power(q3, 2), Quatd::createFromAngleAxis(2*angle, axis)); + EXPECT_EQ(power(q3, 2.0), Quatd::createFromAngleAxis(2*angle, axis)); EXPECT_EQ(power(Quatd(0.5, 0.5, 0.5, 0.5), 2.0, assumeUnit), Quatd(-0.5,0.5,0.5,0.5)); EXPECT_EQ(power(Quatd(0.5, 0.5, 0.5, 0.5), -2.0), Quatd(-0.5,-0.5,-0.5,-0.5)); EXPECT_EQ(sqrt(q1), power(q1, 0.5)); @@ -160,7 +160,7 @@ TEST_F(QuatTest, basicfuns){ EXPECT_EQ(tan(atan(q1)), q1); } -TEST_F(QuatTest, opeartor){ +TEST_F(QuatTest, operator){ Quatd minusQ{-1, -2, -3, -4}; Quatd qAdd{3.5, 0, 6.5, 8}; Quatd qMinus{-1.5, 4, -0.5, 0}; @@ -171,7 +171,15 @@ TEST_F(QuatTest, opeartor){ EXPECT_EQ(-q1, minusQ); EXPECT_EQ(q1 + q2, qAdd); + EXPECT_EQ(q1 + scalar, Quatd(3.5, 2, 3, 4)); + EXPECT_EQ(scalar + q1, Quatd(3.5, 2, 3, 4)); + EXPECT_EQ(q1 + 2.0, Quatd(3, 2, 3, 4)); + EXPECT_EQ(2.0 + q1, Quatd(3, 2, 3, 4)); EXPECT_EQ(q1 - q2, qMinus); + EXPECT_EQ(q1 - scalar, Quatd(-1.5, 2, 3, 4)); + EXPECT_EQ(scalar - q1, Quatd(1.5, -2, -3, -4)); + EXPECT_EQ(q1 - 2.0, Quatd(-1, 2, 3, 4)); + EXPECT_EQ(2.0 - q1, Quatd(1, -2, -3, -4)); EXPECT_EQ(q1 * q2, qMultq); EXPECT_EQ(q1 * scalar, qMults); EXPECT_EQ(scalar * q1, qMults); @@ -252,4 +260,4 @@ TEST_F(QuatTest, interpolation){ } // namespace -}// opencv_test \ No newline at end of file +}// opencv_test From 2fa624aef0bc681c37e8bb267401c54a9e4c1df9 Mon Sep 17 00:00:00 2001 From: Jaime Rivera Date: Sun, 29 Nov 2020 21:17:24 -0800 Subject: [PATCH 026/422] Add Timestamps to MSMF Video Capture by index Enable frame timestamp tests for MSMF Add functional test for camera live timestamps Remove trailing whitespace Add timestamp test to all functional tests. Protect div by 0 Add Timestamps to MSMF Video Capture by index --- modules/videoio/src/cap_msmf.cpp | 5 ++-- modules/videoio/test/test_camera.cpp | 34 ++++++++++++++++++++++++-- modules/videoio/test/test_video_io.cpp | 2 +- 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/modules/videoio/src/cap_msmf.cpp b/modules/videoio/src/cap_msmf.cpp index 15b1d2ade7..7caa7c1ea0 100644 --- a/modules/videoio/src/cap_msmf.cpp +++ b/modules/videoio/src/cap_msmf.cpp @@ -346,8 +346,6 @@ public: STDMETHODIMP OnReadSample(HRESULT hrStatus, DWORD dwStreamIndex, DWORD dwStreamFlags, LONGLONG llTimestamp, IMFSample *pSample) CV_OVERRIDE { - CV_UNUSED(llTimestamp); - HRESULT hr = 0; cv::AutoLock lock(m_mutex); @@ -360,6 +358,7 @@ public: { CV_LOG_DEBUG(NULL, "videoio(MSMF): drop frame (not processed)"); } + m_lastSampleTimestamp = llTimestamp; m_lastSample = pSample; } } @@ -439,6 +438,7 @@ public: IMFSourceReader *m_reader; DWORD m_dwStreamIndex; + LONGLONG m_lastSampleTimestamp; _ComPtr m_lastSample; }; @@ -912,6 +912,7 @@ bool CvCapture_MSMF::grabFrame() CV_LOG_WARNING(NULL, "videoio(MSMF): EOS signal. Capture stream is lost"); return false; } + sampleTime = reader->m_lastSampleTimestamp; return true; } else if (isOpen) diff --git a/modules/videoio/test/test_camera.cpp b/modules/videoio/test/test_camera.cpp index d816f637a7..e82285ad5e 100644 --- a/modules/videoio/test/test_camera.cpp +++ b/modules/videoio/test/test_camera.cpp @@ -11,21 +11,51 @@ namespace opencv_test { namespace { -static void test_readFrames(/*const*/ VideoCapture& capture, const int N = 100, Mat* lastFrame = NULL) +static void test_readFrames(/*const*/ VideoCapture& capture, const int N = 100, Mat* lastFrame = NULL, bool testTimestamps = true) { Mat frame; int64 time0 = cv::getTickCount(); + int64 sysTimePrev = time0; + const double cvTickFreq = cv::getTickFrequency(); + + double camTimePrev = 0.0; + const double fps = capture.get(cv::CAP_PROP_FPS); + const double framePeriod = fps == 0.0 ? 1. : 1.0 / fps; + + const bool validTickAndFps = cvTickFreq != 0 && fps != 0.; + testTimestamps &= validTickAndFps; + for (int i = 0; i < N; i++) { SCOPED_TRACE(cv::format("frame=%d", i)); capture >> frame; + const int64 sysTimeCurr = cv::getTickCount(); + const double camTimeCurr = capture.get(cv::CAP_PROP_POS_MSEC); ASSERT_FALSE(frame.empty()); + // Do we have a previous frame? + if (i > 0 && testTimestamps) + { + const double sysTimeElapsedSecs = (sysTimeCurr - sysTimePrev) / cvTickFreq; + const double camTimeElapsedSecs = (camTimeCurr - camTimePrev) / 1000.; + + // Check that the time between two camera frames and two system time calls + // are within 1.5 frame periods of one another. + // + // 1.5x is chosen to accomodate for a dropped frame, and an additional 50% + // to account for drift in the scale of the camera and system time domains. + EXPECT_NEAR(sysTimeElapsedSecs, camTimeElapsedSecs, framePeriod * 1.5); + } + EXPECT_GT(cvtest::norm(frame, NORM_INF), 0) << "Complete black image has been received"; + + sysTimePrev = sysTimeCurr; + camTimePrev = camTimeCurr; } + int64 time1 = cv::getTickCount(); - printf("Processed %d frames on %.2f FPS\n", N, (N * cv::getTickFrequency()) / (time1 - time0 + 1)); + printf("Processed %d frames on %.2f FPS\n", N, (N * cvTickFreq) / (time1 - time0 + 1)); if (lastFrame) *lastFrame = frame.clone(); } diff --git a/modules/videoio/test/test_video_io.cpp b/modules/videoio/test/test_video_io.cpp index 3f5617d8ce..19fc32b53e 100644 --- a/modules/videoio/test/test_video_io.cpp +++ b/modules/videoio/test/test_video_io.cpp @@ -237,7 +237,7 @@ public: if (!isBackendAvailable(apiPref, cv::videoio_registry::getStreamBackends())) throw SkipTestException(cv::String("Backend is not available/disabled: ") + cv::videoio_registry::getBackendName(apiPref)); - if ((apiPref == CAP_MSMF) || ((apiPref == CAP_FFMPEG) && ((ext == "h264") || (ext == "h265")))) + if (((apiPref == CAP_FFMPEG) && ((ext == "h264") || (ext == "h265")))) throw SkipTestException(cv::String("Backend ") + cv::videoio_registry::getBackendName(apiPref) + cv::String(" does not support CAP_PROP_POS_MSEC option")); From b7a70fda790dc9728e07c424149301c4428a55d3 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 3 Dec 2020 02:03:11 +0000 Subject: [PATCH 027/422] github(actions): manual trigger for arm64-build-checks.yml --- .github/workflows/arm64-build-checks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/arm64-build-checks.yml b/.github/workflows/arm64-build-checks.yml index f5988c7895..d3cf532d59 100644 --- a/.github/workflows/arm64-build-checks.yml +++ b/.github/workflows/arm64-build-checks.yml @@ -1,6 +1,6 @@ name: arm64 build checks -on: [pull_request] +on: workflow_dispatch jobs: build: From 43e58de9183ad2bf6084cca969090ac44f705df7 Mon Sep 17 00:00:00 2001 From: Yiming Li Date: Thu, 3 Dec 2020 15:35:52 +0800 Subject: [PATCH 028/422] fix: typo --- doc/tutorials/videoio/video-write/video_write.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/tutorials/videoio/video-write/video_write.markdown b/doc/tutorials/videoio/video-write/video_write.markdown index b81107559e..29b6cf6f4e 100644 --- a/doc/tutorials/videoio/video-write/video_write.markdown +++ b/doc/tutorials/videoio/video-write/video_write.markdown @@ -109,7 +109,7 @@ const string NAME = source.substr(0, pAt) + argv[2][0] + ".avi"; // Form the n @code{.cpp} CV_FOURCC('P','I','M,'1') // this is an MPEG1 codec from the characters to integer @endcode - If you pass for this argument minus one than a window will pop up at runtime that contains all + If you pass for this argument minus one then a window will pop up at runtime that contains all the codec installed on your system and ask you to select the one to use: ![](images/videoCompressSelect.png) From 7f3ba5963d2d21d024c68f7decb21564365b027f Mon Sep 17 00:00:00 2001 From: Randall Britten Date: Wed, 2 Dec 2020 14:47:37 +1300 Subject: [PATCH 029/422] Fixed minor typo "poins" in documentation page --- modules/calib3d/include/opencv2/calib3d.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp index 812c6be108..04b5e58e23 100644 --- a/modules/calib3d/include/opencv2/calib3d.hpp +++ b/modules/calib3d/include/opencv2/calib3d.hpp @@ -91,7 +91,7 @@ respectively) by the same factor. The joint rotation-translation matrix \f$[R|t]\f$ is the matrix product of a projective transformation and a homogeneous transformation. The 3-by-4 projective transformation maps 3D points -represented in camera coordinates to 2D poins in the image plane and represented in normalized +represented in camera coordinates to 2D points in the image plane and represented in normalized camera coordinates \f$x' = X_c / Z_c\f$ and \f$y' = Y_c / Z_c\f$: \f[Z_c \begin{bmatrix} From b31ce408ae088e13cfbf7b3306d21a6d14c01205 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 3 Dec 2020 11:59:06 +0000 Subject: [PATCH 030/422] cmake: fix processing order of _bindings_generator - ensure that wrapped modules are already processed --- cmake/OpenCVModule.cmake | 21 ++++++++++----------- modules/python/bindings/CMakeLists.txt | 2 +- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index 10ee406032..38a1bb7a7f 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -98,15 +98,6 @@ macro(ocv_add_dependencies full_modname) endforeach() unset(__depsvar) - # hack for python - set(__python_idx) - list(FIND OPENCV_MODULE_${full_modname}_WRAPPERS "python" __python_idx) - if (NOT __python_idx EQUAL -1) - list(REMOVE_ITEM OPENCV_MODULE_${full_modname}_WRAPPERS "python") - list(APPEND OPENCV_MODULE_${full_modname}_WRAPPERS "python_bindings_generator" "python2" "python3") - endif() - unset(__python_idx) - ocv_list_unique(OPENCV_MODULE_${full_modname}_REQ_DEPS) ocv_list_unique(OPENCV_MODULE_${full_modname}_OPT_DEPS) ocv_list_unique(OPENCV_MODULE_${full_modname}_PRIVATE_REQ_DEPS) @@ -209,9 +200,17 @@ macro(ocv_add_module _name) set(OPENCV_MODULES_DISABLED_USER ${OPENCV_MODULES_DISABLED_USER} "${the_module}" CACHE INTERNAL "List of OpenCV modules explicitly disabled by user") endif() - # add reverse wrapper dependencies + # add reverse wrapper dependencies (BINDINDS) foreach (wrapper ${OPENCV_MODULE_${the_module}_WRAPPERS}) - ocv_add_dependencies(opencv_${wrapper} OPTIONAL ${the_module}) + if(wrapper STREQUAL "python") # hack for python (BINDINDS) + ocv_add_dependencies(opencv_python2 OPTIONAL ${the_module}) + ocv_add_dependencies(opencv_python3 OPTIONAL ${the_module}) + else() + ocv_add_dependencies(opencv_${wrapper} OPTIONAL ${the_module}) + endif() + if(DEFINED OPENCV_MODULE_opencv_${wrapper}_bindings_generator_CLASS) + ocv_add_dependencies(opencv_${wrapper}_bindings_generator OPTIONAL ${the_module}) + endif() endforeach() # stop processing of current file diff --git a/modules/python/bindings/CMakeLists.txt b/modules/python/bindings/CMakeLists.txt index 4ad3d0c8d9..0505f1f03f 100644 --- a/modules/python/bindings/CMakeLists.txt +++ b/modules/python/bindings/CMakeLists.txt @@ -11,7 +11,7 @@ set(PYTHON_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../") # get list of modules to wrap set(OPENCV_PYTHON_MODULES) foreach(m ${OPENCV_MODULES_BUILD}) - if (";${OPENCV_MODULE_${m}_WRAPPERS};" MATCHES ";${MODULE_NAME};" AND HAVE_${m}) + if (";${OPENCV_MODULE_${m}_WRAPPERS};" MATCHES ";python;" AND HAVE_${m}) list(APPEND OPENCV_PYTHON_MODULES ${m}) #message(STATUS "\t${m}") endif() From 7e5c4fe1cdb597272e757cde435f2b56d39d1d1d Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 30 Nov 2020 03:41:21 +0000 Subject: [PATCH 031/422] cmake(js): update js targets - unconditional js bindings source code generation - use common name for tests: opencv_test_js --- cmake/platforms/OpenCV-Emscripten.cmake | 1 + modules/js/CMakeLists.txt | 82 +++++++--------------- modules/js/common.cmake | 13 ++++ modules/js/generator/CMakeLists.txt | 74 +++++++++++++++++++ modules/js/{src => generator}/embindgen.py | 10 +-- modules/js/{src => generator}/templates.py | 0 modules/js/src/core_bindings.cpp | 2 +- platforms/js/build_js.py | 2 - 8 files changed, 122 insertions(+), 62 deletions(-) create mode 100644 cmake/platforms/OpenCV-Emscripten.cmake create mode 100644 modules/js/common.cmake create mode 100644 modules/js/generator/CMakeLists.txt rename modules/js/{src => generator}/embindgen.py (99%) rename modules/js/{src => generator}/templates.py (100%) diff --git a/cmake/platforms/OpenCV-Emscripten.cmake b/cmake/platforms/OpenCV-Emscripten.cmake new file mode 100644 index 0000000000..ec15fba799 --- /dev/null +++ b/cmake/platforms/OpenCV-Emscripten.cmake @@ -0,0 +1 @@ +set(OPENCV_SKIP_LINK_AS_NEEDED 1) diff --git a/modules/js/CMakeLists.txt b/modules/js/CMakeLists.txt index f3a625b37e..d82e4a26f6 100644 --- a/modules/js/CMakeLists.txt +++ b/modules/js/CMakeLists.txt @@ -1,13 +1,19 @@ # ---------------------------------------------------------------------------- # CMake file for js support # ---------------------------------------------------------------------------- -set(the_description "The js bindings") - -if(NOT BUILD_opencv_js) # should be enabled explicitly (by build_js.py script) - ocv_module_disable(js) +if(OPENCV_INITIAL_PASS) + # generator for Objective-C source code and documentation signatures + add_subdirectory(generator) endif() +if(NOT BUILD_opencv_js) # should be enabled explicitly (by build_js.py script) + return() +endif() + +set(the_description "The JavaScript(JS) bindings") + set(OPENCV_JS "opencv.js") +set(JS_HELPER "${CMAKE_CURRENT_SOURCE_DIR}/src/helpers.js") find_path(EMSCRIPTEN_INCLUDE_DIR emscripten/bind.h @@ -28,59 +34,18 @@ if(NOT EMSCRIPTEN_INCLUDE_DIR OR NOT PYTHON_DEFAULT_AVAILABLE) ocv_module_disable(js) endif() -ocv_add_module(js BINDINGS) +ocv_add_module(js BINDINGS PRIVATE_REQUIRED opencv_js_bindings_generator) ocv_module_include_directories(${EMSCRIPTEN_INCLUDE_DIR}) -# get list of modules to wrap -# message(STATUS "Wrapped in js:") -set(OPENCV_JS_MODULES) -foreach(m ${OPENCV_MODULES_BUILD}) - if(";${OPENCV_MODULE_${m}_WRAPPERS};" MATCHES ";js;" AND HAVE_${m}) - list(APPEND OPENCV_JS_MODULES ${m}) - # message(STATUS "\t${m}") - endif() -endforeach() - -set(opencv_hdrs "") -foreach(m ${OPENCV_JS_MODULES}) - list(APPEND opencv_hdrs ${OPENCV_MODULE_${m}_HEADERS}) -endforeach(m) - -# header blacklist -ocv_list_filterout(opencv_hdrs "modules/.*.h$") -ocv_list_filterout(opencv_hdrs "modules/core/.*/cuda") -ocv_list_filterout(opencv_hdrs "modules/core/.*/opencl") -ocv_list_filterout(opencv_hdrs "modules/core/include/opencv2/core/opengl.hpp") -ocv_list_filterout(opencv_hdrs "modules/core/include/opencv2/core/ocl.hpp") -ocv_list_filterout(opencv_hdrs "modules/cuda.*") -ocv_list_filterout(opencv_hdrs "modules/cudev") -ocv_list_filterout(opencv_hdrs "modules/core/.*/hal/") -ocv_list_filterout(opencv_hdrs "modules/.*/detection_based_tracker.hpp") # Conditional compilation -ocv_list_filterout(opencv_hdrs "modules/core/include/opencv2/core/utils/.*") - -file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/headers.txt" "${opencv_hdrs}") - -set(bindings_cpp "${CMAKE_CURRENT_BINARY_DIR}/bindings.cpp") - -set(scripts_hdr_parser "${CMAKE_CURRENT_SOURCE_DIR}/../python/src2/hdr_parser.py") - -set(JS_HELPER "${CMAKE_CURRENT_SOURCE_DIR}/src/helpers.js") - -add_custom_command( - OUTPUT ${bindings_cpp} - COMMAND ${PYTHON_DEFAULT_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/src/embindgen.py" ${scripts_hdr_parser} ${bindings_cpp} "${CMAKE_CURRENT_BINARY_DIR}/headers.txt" "${CMAKE_CURRENT_SOURCE_DIR}/src/core_bindings.cpp" - DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/core_bindings.cpp - DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/embindgen.py - DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/templates.py - DEPENDS ${scripts_hdr_parser} - #(not needed - generated by CMake) DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/headers.txt - DEPENDS ${opencv_hdrs} - DEPENDS ${JS_HELPER}) - add_definitions("-std=c++11") -link_libraries(${OPENCV_MODULE_${the_module}_DEPS}) +set(deps ${OPENCV_MODULE_${the_module}_DEPS}) +list(REMOVE_ITEM deps opencv_js_bindings_generator) # don't add dummy module +link_libraries(${deps}) + +set(bindings_cpp "${OPENCV_JS_BINDINGS_DIR}/gen/bindings.cpp") +set_source_files_properties(${bindings_cpp} PROPERTIES GENERATED TRUE) OCV_OPTION(BUILD_WASM_INTRIN_TESTS "Build WASM intrin tests" OFF ) if(BUILD_WASM_INTRIN_TESTS) @@ -94,12 +59,17 @@ else() ocv_add_executable(${the_module} ${bindings_cpp}) endif() +add_dependencies(${the_module} gen_opencv_js_source) + set_target_properties(${the_module} PROPERTIES COMPILE_FLAGS "-Wno-missing-prototypes") +#set(EMSCRIPTEN_LINK_FLAGS "${EMSCRIPTEN_LINK_FLAGS} -s NODERAWFS=0") set(EMSCRIPTEN_LINK_FLAGS "${EMSCRIPTEN_LINK_FLAGS} --memory-init-file 0 -s TOTAL_MEMORY=128MB -s WASM_MEM_MAX=1GB -s ALLOW_MEMORY_GROWTH=1") set(EMSCRIPTEN_LINK_FLAGS "${EMSCRIPTEN_LINK_FLAGS} -s MODULARIZE=1 -s SINGLE_FILE=1") set(EMSCRIPTEN_LINK_FLAGS "${EMSCRIPTEN_LINK_FLAGS} -s EXPORT_NAME=\"'cv'\" -s DEMANGLE_SUPPORT=1") set(EMSCRIPTEN_LINK_FLAGS "${EMSCRIPTEN_LINK_FLAGS} -s FORCE_FILESYSTEM=1 --use-preload-plugins --bind --post-js ${JS_HELPER} -Wno-missing-prototypes") +#set(EMSCRIPTEN_LINK_FLAGS "${EMSCRIPTEN_LINK_FLAGS} --use-preload-plugins --bind --post-js ${JS_HELPER} -Wno-missing-prototypes") +#set(EMSCRIPTEN_LINK_FLAGS "${EMSCRIPTEN_LINK_FLAGS} -s FORCE_FILESYSTEM=1 --bind --post-js ${JS_HELPER} -Wno-missing-prototypes") set_target_properties(${the_module} PROPERTIES LINK_FLAGS "${EMSCRIPTEN_LINK_FLAGS}") # add UMD wrapper @@ -150,7 +120,7 @@ add_custom_command(OUTPUT "${opencv_test_js_bin_dir}/${test_data}" ) list(APPEND opencv_test_js_file_deps "${test_data_path}" "${opencv_test_js_bin_dir}/${test_data}") -add_custom_target(${PROJECT_NAME}_test ALL +add_custom_target(${PROJECT_NAME}_test DEPENDS ${OCV_JS_PATH} ${opencv_test_js_file_deps}) # perf @@ -173,7 +143,7 @@ foreach(f ${perf_files}) list(APPEND opencv_perf_js_file_deps "${perf_dir}/${f}" "${opencv_perf_js_bin_dir}/${f}") endforeach() -add_custom_target(${PROJECT_NAME}_perf ALL +add_custom_target(${PROJECT_NAME}_perf DEPENDS ${OCV_JS_PATH} ${opencv_perf_js_file_deps}) #loader @@ -193,4 +163,6 @@ add_custom_command( list(APPEND opencv_loader_js_file_deps "${loader_dir}/loader.js" "${opencv_loader_js_bin_dir}/loader.js") add_custom_target(${PROJECT_NAME}_loader ALL - DEPENDS ${OCV_JS_PATH} ${opencv_loader_js_file_deps}) \ No newline at end of file + DEPENDS ${OCV_JS_PATH} ${opencv_loader_js_file_deps}) + +add_custom_target(opencv_test_js ALL DEPENDS opencv_js_test opencv_js_perf opencv_js_loader) diff --git a/modules/js/common.cmake b/modules/js/common.cmake new file mode 100644 index 0000000000..192bcca4ea --- /dev/null +++ b/modules/js/common.cmake @@ -0,0 +1,13 @@ +# get list of modules to wrap +if(HAVE_opencv_js) + message(STATUS "Wrapped in JavaScript(js):") +endif() +set(OPENCV_JS_MODULES "") +foreach(m ${OPENCV_MODULES_BUILD}) + if(";${OPENCV_MODULE_${m}_WRAPPERS};" MATCHES ";js;" AND HAVE_${m}) + list(APPEND OPENCV_JS_MODULES ${m}) + if(HAVE_opencv_js) + message(STATUS " ${m}") + endif() + endif() +endforeach() diff --git a/modules/js/generator/CMakeLists.txt b/modules/js/generator/CMakeLists.txt new file mode 100644 index 0000000000..75c8a03545 --- /dev/null +++ b/modules/js/generator/CMakeLists.txt @@ -0,0 +1,74 @@ +set(MODULE_NAME "js_bindings_generator") +set(OPENCV_MODULE_IS_PART_OF_WORLD FALSE) +ocv_add_module(${MODULE_NAME} INTERNAL) + +set(OPENCV_JS_BINDINGS_DIR "${CMAKE_CURRENT_BINARY_DIR}" CACHE INTERNAL "") +file(REMOVE_RECURSE "${OPENCV_JS_BINDINGS_DIR}/gen") +file(MAKE_DIRECTORY "${OPENCV_JS_BINDINGS_DIR}/gen") +file(REMOVE "${OPENCV_DEPHELPER}/gen_opencv_js_source") # force re-run after CMake + +# This file is included from a subdirectory +set(JS_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/..") +include(${JS_SOURCE_DIR}/common.cmake) # fill OPENCV_JS_MODULES + +set(opencv_hdrs "") +foreach(m ${OPENCV_JS_MODULES}) + list(APPEND opencv_hdrs ${OPENCV_MODULE_${m}_HEADERS}) +endforeach(m) + +# header blacklist +ocv_list_filterout(opencv_hdrs "modules/.*.h$") +ocv_list_filterout(opencv_hdrs "modules/core/.*/cuda") +ocv_list_filterout(opencv_hdrs "modules/core/.*/opencl") +ocv_list_filterout(opencv_hdrs "modules/core/include/opencv2/core/opengl.hpp") +ocv_list_filterout(opencv_hdrs "modules/core/include/opencv2/core/ocl.hpp") +ocv_list_filterout(opencv_hdrs "modules/cuda.*") +ocv_list_filterout(opencv_hdrs "modules/cudev") +ocv_list_filterout(opencv_hdrs "modules/core/.*/hal/") +ocv_list_filterout(opencv_hdrs "modules/.*/detection_based_tracker.hpp") # Conditional compilation +ocv_list_filterout(opencv_hdrs "modules/core/include/opencv2/core/utils/.*") + +ocv_update_file("${CMAKE_CURRENT_BINARY_DIR}/headers.txt" "${opencv_hdrs}") + +set(bindings_cpp "${OPENCV_JS_BINDINGS_DIR}/gen/bindings.cpp") + +set(scripts_hdr_parser "${JS_SOURCE_DIR}/../python/src2/hdr_parser.py") + +if(DEFINED ENV{OPENCV_JS_WHITELIST}) + set(OPENCV_JS_WHITELIST_FILE "$ENV{OPENCV_JS_WHITELIST}") +else() + set(OPENCV_JS_WHITELIST_FILE "${OpenCV_SOURCE_DIR}/platforms/js/opencv_js.config.py") +endif() + +add_custom_command( + OUTPUT ${bindings_cpp} "${OPENCV_DEPHELPER}/gen_opencv_js_source" + COMMAND + ${PYTHON_DEFAULT_EXECUTABLE} + "${CMAKE_CURRENT_SOURCE_DIR}/embindgen.py" + "${scripts_hdr_parser}" + "${bindings_cpp}" + "${CMAKE_CURRENT_BINARY_DIR}/headers.txt" + "${JS_SOURCE_DIR}/src/core_bindings.cpp" + "${OPENCV_JS_WHITELIST_FILE}" + COMMAND + ${CMAKE_COMMAND} -E touch "${OPENCV_DEPHELPER}/gen_opencv_js_source" + WORKING_DIRECTORY + "${CMAKE_CURRENT_BINARY_DIR}/gen" + DEPENDS + ${JS_SOURCE_DIR}/src/core_bindings.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/embindgen.py + ${CMAKE_CURRENT_SOURCE_DIR}/templates.py + ${scripts_hdr_parser} + #(not needed - generated by CMake) ${CMAKE_CURRENT_BINARY_DIR}/headers.txt + ${opencv_hdrs} + COMMENT "Generate source files for JavaScript bindings" +) + +add_custom_target(gen_opencv_js_source + # excluded from all: ALL + DEPENDS ${bindings_cpp} "${OPENCV_DEPHELPER}/gen_opencv_js_source" + SOURCES + ${JS_SOURCE_DIR}/src/core_bindings.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/embindgen.py + ${CMAKE_CURRENT_SOURCE_DIR}/templates.py +) diff --git a/modules/js/src/embindgen.py b/modules/js/generator/embindgen.py similarity index 99% rename from modules/js/src/embindgen.py rename to modules/js/generator/embindgen.py index 0ec4488946..6e2bac71a1 100644 --- a/modules/js/src/embindgen.py +++ b/modules/js/generator/embindgen.py @@ -104,8 +104,6 @@ def makeWhiteList(module_list): return wl white_list = None -exec(open(os.environ["OPENCV_JS_WHITELIST"]).read()) -assert(white_list) # Features to be exported export_enums = False @@ -891,10 +889,10 @@ class JSWrapperGenerator(object): if __name__ == "__main__": - if len(sys.argv) < 4: + if len(sys.argv) < 5: print("Usage:\n", \ os.path.basename(sys.argv[0]), \ - " ") + " ") print("Current args are: ", ", ".join(["'"+a+"'" for a in sys.argv])) exit(0) @@ -908,5 +906,9 @@ if __name__ == "__main__": bindingsCpp = sys.argv[2] headers = open(sys.argv[3], 'r').read().split(';') coreBindings = sys.argv[4] + whiteListFile = sys.argv[5] + exec(open(whiteListFile).read()) + assert(white_list) + generator = JSWrapperGenerator() generator.gen(bindingsCpp, headers, coreBindings) diff --git a/modules/js/src/templates.py b/modules/js/generator/templates.py similarity index 100% rename from modules/js/src/templates.py rename to modules/js/generator/templates.py diff --git a/modules/js/src/core_bindings.cpp b/modules/js/src/core_bindings.cpp index ceeb641c7c..a43fb726de 100644 --- a/modules/js/src/core_bindings.cpp +++ b/modules/js/src/core_bindings.cpp @@ -87,7 +87,7 @@ namespace hal { using namespace emscripten; using namespace cv; #ifdef HAVE_OPENCV_DNN -using namespace dnn; +using namespace cv::dnn; #endif namespace binding_utils diff --git a/platforms/js/build_js.py b/platforms/js/build_js.py index 38e988a3bd..cd22db0f02 100644 --- a/platforms/js/build_js.py +++ b/platforms/js/build_js.py @@ -129,11 +129,9 @@ class Builder: "-DBUILD_opencv_superres=OFF", "-DBUILD_opencv_stitching=OFF", "-DBUILD_opencv_java=OFF", - "-DBUILD_opencv_java_bindings_generator=OFF", "-DBUILD_opencv_js=ON", "-DBUILD_opencv_python2=OFF", "-DBUILD_opencv_python3=OFF", - "-DBUILD_opencv_python_bindings_generator=OFF", "-DBUILD_EXAMPLES=OFF", "-DBUILD_PACKAGE=OFF", "-DBUILD_TESTS=OFF", From 22d64ae08fbf75162e0b6b0ca031be736343d92c Mon Sep 17 00:00:00 2001 From: Wenqing Zhang Date: Fri, 4 Dec 2020 02:47:40 +0800 Subject: [PATCH 032/422] Merge pull request #17570 from HannibalAPE:text_det_recog_demo [GSoC] High Level API and Samples for Scene Text Detection and Recognition * APIs and samples for scene text detection and recognition * update APIs and tutorial for Text Detection and Recognition * API updates: (1) put decodeType into struct Voc (2) optimize the post-processing of DB * sample update: (1) add transformation into scene_text_spotting.cpp (2) modify text_detection.cpp with API update * update tutorial * simplify text recognition API update tutorial * update impl usage in recognize() and detect() * dnn: refactoring public API of TextRecognitionModel/TextDetectionModel * update provided models update opencv.bib * dnn: adjust text rectangle angle * remove points ordering operation in model.cpp * update gts of DB test in test_model.cpp * dnn: ensure to keep text rectangle angle - avoid 90/180 degree turns * dnn(text): use quadrangle result in TextDetectionModel API * dnn: update Text Detection API (1) keep points' order consistent with (bl, tl, tr, br) in unclip (2) update contourScore with boundingRect --- doc/opencv.bib | 23 + doc/tutorials/dnn/dnn_OCR/dnn_OCR.markdown | 3 +- .../dnn/dnn_text_spotting/detect_test1.jpg | Bin 0 -> 41415 bytes .../dnn/dnn_text_spotting/detect_test2.jpg | Bin 0 -> 92092 bytes .../dnn_text_spotting.markdown | 316 +++++++ .../text_det_test_results.jpg | Bin 0 -> 49278 bytes .../dnn/dnn_text_spotting/text_rec_test.png | Bin 0 -> 2911 bytes .../dnn/table_of_content_dnn.markdown | 12 +- modules/dnn/include/opencv2/dnn/dnn.hpp | 249 ++++++ modules/dnn/src/model.cpp | 780 +++++++++++++++++- modules/dnn/test/test_common.hpp | 8 + modules/dnn/test/test_common.impl.hpp | 46 ++ modules/dnn/test/test_model.cpp | 220 +++++ samples/data/alphabet_36.txt | 36 + samples/data/alphabet_94.txt | 94 +++ samples/dnn/scene_text_detection.cpp | 151 ++++ samples/dnn/scene_text_recognition.cpp | 144 ++++ samples/dnn/scene_text_spotting.cpp | 169 ++++ samples/dnn/text_detection.cpp | 269 +++--- 19 files changed, 2339 insertions(+), 181 deletions(-) create mode 100644 doc/tutorials/dnn/dnn_text_spotting/detect_test1.jpg create mode 100644 doc/tutorials/dnn/dnn_text_spotting/detect_test2.jpg create mode 100644 doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown create mode 100644 doc/tutorials/dnn/dnn_text_spotting/text_det_test_results.jpg create mode 100644 doc/tutorials/dnn/dnn_text_spotting/text_rec_test.png create mode 100644 samples/data/alphabet_36.txt create mode 100644 samples/data/alphabet_94.txt create mode 100644 samples/dnn/scene_text_detection.cpp create mode 100644 samples/dnn/scene_text_recognition.cpp create mode 100644 samples/dnn/scene_text_spotting.cpp diff --git a/doc/opencv.bib b/doc/opencv.bib index 54396d6a10..6212ea5a55 100644 --- a/doc/opencv.bib +++ b/doc/opencv.bib @@ -1261,3 +1261,26 @@ pages={281--305}, year={1987} } +@inproceedings{liao2020real, + author={Liao, Minghui and Wan, Zhaoyi and Yao, Cong and Chen, Kai and Bai, Xiang}, + title={Real-time Scene Text Detection with Differentiable Binarization}, + booktitle={Proc. AAAI}, + year={2020} +} +@article{shi2016end, + title={An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition}, + author={Shi, Baoguang and Bai, Xiang and Yao, Cong}, + journal={IEEE transactions on pattern analysis and machine intelligence}, + volume={39}, + number={11}, + pages={2298--2304}, + year={2016}, + publisher={IEEE} +} +@inproceedings{zhou2017east, + title={East: an efficient and accurate scene text detector}, + author={Zhou, Xinyu and Yao, Cong and Wen, He and Wang, Yuzhi and Zhou, Shuchang and He, Weiran and Liang, Jiajun}, + booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition}, + pages={5551--5560}, + year={2017} +} diff --git a/doc/tutorials/dnn/dnn_OCR/dnn_OCR.markdown b/doc/tutorials/dnn/dnn_OCR/dnn_OCR.markdown index 43c86acaf0..ddf40c96a0 100644 --- a/doc/tutorials/dnn/dnn_OCR/dnn_OCR.markdown +++ b/doc/tutorials/dnn/dnn_OCR/dnn_OCR.markdown @@ -1,6 +1,7 @@ # How to run custom OCR model {#tutorial_dnn_OCR} @prev_tutorial{tutorial_dnn_custom_layers} +@next_tutorial{tutorial_dnn_text_spotting} ## Introduction @@ -43,4 +44,4 @@ The input of text recognition model is the output of the text detection model, w DenseNet_CTC has the smallest parameters and best FPS, and it is suitable for edge devices, which are very sensitive to the cost of calculation. If you have limited computing resources and want to achieve better accuracy, VGG_CTC is a good choice. -CRNN_VGG_BiLSTM_CTC is suitable for scenarios that require high recognition accuracy. \ No newline at end of file +CRNN_VGG_BiLSTM_CTC is suitable for scenarios that require high recognition accuracy. diff --git a/doc/tutorials/dnn/dnn_text_spotting/detect_test1.jpg b/doc/tutorials/dnn/dnn_text_spotting/detect_test1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b154dfc4ec400192101853e1b9fbda0af52139b7 GIT binary patch literal 41415 zcmc%wg+|1qzC?OqM$(lFo2EpH~>B} z5CDWoPXLetWB>^=kOFi74M2tTG{_VWzz8t^U*G{0f#(1}GW@?-5s(Db014nFGN>Wb zo`4acf%HbmR0QA#I3Pm=Pyi$%eE~28bR)wf@Q9*K`2TMP{(S%l!KgOq5L6T<07Qs_ zN{I4r4B1`)73F^p`2RT+5Goov1}6C5X8`=43<2OfB_FZq7|UV6aUB#TpQR=t)U`6t z#5S?PDQ@e`Y|B+vgVV`X1vNku3Z_`+EsK<%yXjphxV;URR&I{A!yt->`m(F2rsd$I zk5j%)oM6H0=>$=1$wjHdS1wkS|a}_`&z3FXqb=)UD9in@@6lUL@dy%&%LGSB*+X@7lj&Mpl8EC z)Qg2GWi39pH!tJex^(yYrwxFsq8Y{FT=EfW!!Xxb%Sw45K%uQ|Sz$9B$7v~2mrG%f zm@6RB4GL=MY5D9LZ#m^_CEQkd}_xDZp&W6lHydVG~`4Fwh!H`EG>CSbdD>@PEa+6+m zkz}uugSXsWO(V*mGs)c8IN-nLzNX&i-_Q<4fr0@DuS~u)LGICwO#CDK56>kGut8@5 zLF5wEY8W7KbAbmMvySoZ`sK90Lo@AEGE-p=6Esg7mF=cx@MTMHbNI>@Dtm8*tt%8@ z002aG!raY6jjW|rdHog~ML0@NLq?Sk!X%ruK+|XLn==u7ebo1D((1BWogqg@$HY0E z=F7JwTI?(H<=4$_cmC7ZHo`h$)JS2;tV%=#m5!!DuX>hm#RFv~#u#~3NX9-u310ye zBx98v2ri3jKkMq1fLggE&SY$LF42b{He&WjraIvpTkCxIP-;hXk_jX!B?~F<;_-s0 zv*X8z1V`jlREt(nm>nBlqKJyeZi;4OR4BNm{y_raLAExI5DND29oOp3y~kVA;wyF5Rtp2J{WEQveI7 z@nDX$RGX2U_Whf%ZLC7-<%PR&y3z%Zi$jelr%KFARmSKX6=$b3m3oOElZB3DW%29E z%8KbDG8iT|*M|)h$4ftVECb<kro6eiIav;&4m`sz zRH>#3>HQI%f@&$a8Gb*&Gbu9+D6)XC05{p>IE~5tgt@&W+0aM2{MyYnFu;fIje zzHA>YE-rCIeSI>d_4}eOeF8P_0|#0(ny7DVmXYveximrf$iyz5c(}IukQ=57JCHc- zz_^vc3bND3BZ!CHZ?b3EJ;U`qoWi==XbJTvlplB|(c7N7%Q0|!e0;Lw=~bvg0g_>o zW#q$BBgXbD@Ou^VMPdr|`05t^I{;{pCt>oP*SQ7h)Mlv{s4dxZ>{EIv)OWp28ZacIZ@wY%IJH26*w9NSKXiNG**XuH8|=303#ZV zft5)W0vyr35T8GcCf?~;ybbNaCX~VJJa)xSVo}p&(gT$XHx1V3>Ej4-Iu~b`dRAqJ z&upi0TH-mUQ^;CM(n`{bA78Zj57&SInU^X$u|$FiNCYZ9+7ltPEX((fw5$P!4TSY7 zqtRH@w7@8=2R%XWD>Y`7Ue%Z9V|Q?F%4=Tu>jkFaUQbfIzVr9LYH2ByufV_qK!7So zmI4Sfft;XpG$(O$Xz}vm7|Vc2m`SZ}oehe@61$|Toz>E)w4{?W!B^rqh4y@<$m_}z zd&w$|FqP_Raqw{^548>uoB&hA60^_BfGwUR=)Bn+^4E4wg8bLPGWc?G@xvN6yz*Kk zj3MCF6_@tHXuWBBDK8g;b79!PZBSJx`g=w9!A*CM3a=;z4-C9Sw4_`nhyafkz4ZLy zEFS!i)Zc*-29z0AFvc)z&gTl5PYPXMx2k$p?Ns&NP4h1oN0Tx%pT4L%Gc!9TwXc4? zX~|hCcuW+JKTLt^!lI@MkV#lY7LFwA{T`6|`!^Yo$w?F@huFS3%H$*?rc0U&XP#b{S8~nE%br)E?QdfoZhnMVc@qdmV+b-(pWF_3TermE zzmT4}>8%OaJbAR-;sG>Kn6LndCI{MZ9;Y1eU48ReI4xQfA0c7BG><4t0bYEodQa%7 z%f$fQ<3&sQw7rGYvy@4e)k@QG787D#)t>sR-=TijAy&@hTmq5mOSj@estf=GOhrt@ zV)yDB;DQ$R_nz4NqHDGmTUX+j-7T&#yW6juq5Rht+azP z%IU(KqhhkQ873Mw5g|k4W-ElIr9OQ7=jR>F`4Am(i}%*eCxN+;s)p@n*>g7`LT61C z=8y1)r-d}3v-9)QPPGbKE!l-l4BkpmhKyirelq?es4r-SL5*aBSU*7+Et@lmBR7=F za=U2DG}Jv5k?_^2(aMTDyw$jxht6XHH>$qg+x9THr11h+ zkW*nw@#9190M9?5;NUjIMoj31(jE5c%1lx4*9`o!_~g#!?y7HGT})li=3HIQn#zhU z8WS`0(Z2T2D{5ea6acbkp;jfPH6h{u0M(=GpC&E`Y0p`cLCWZ&gyn35Ylx7DdSTnm z<}(G&fU8ix>^Fj!9g4h|Et}_?C^^D~&yS!x9&gW<{i+j5-ZC@Ke@)Tkztn5ckf(6< zNu-l*y?l7l>93ibI-~fKCHkAM!on}-FW>uH)ypuC{Vm&t;oEmq2p+TUMNzKF9z!eG zhZ$pGko%G2$zpc181CKiD?Sbq=cxl5P~I4MZXCUnXFL^5 z?MnbF7EMSv0U~7U%X{(rpp7_7_u6PvrAyo@0hYl8k#3%IuDX-%qv5erQrE?t?es^N z2~0ev{&I;t5pPRwrI2B!Mg5|ef{#ONkFZ)1qg+N3LHP`5G9y|7_I-r4<&4VJhkF$xOjwpPYh~x&)RKa1K)cJJvI`OO*cP; z9Ccg!#iny1B0a^bVPgA_o0#B;rixr@NbozR;^pebdOrzK(T*{)^${uSh5L{{tL8-@ zm^ntYKI5>n3a2V@IrW#lAFo5SwC(;TxmLTp1NmoVd5htsd(bxHvB{eY(bbpclS&oM z$cNvQK9=YTU=f##5OCES4rv=(D;C`NT;JHic+=juo`sc)wWKi!%7(=0R1g)<@o~oD zE4=5u|Epi+Jb!ZWGaSE&l~^@f5rR*Jw*O3+fFjf~yVlj>83t9rp=--8ZvW%;)Eg~X zu8{X3lcBc(w|AZ`nfWiI>7R*Y&q+$X$-JYL*b=EYNKr5#V?&FDnf8{71r3}$42Eh? z-1YXqP>#v@iKty&!Y=TS|H7Bchq{EJq~v4Hi*2@N(QjO7?mh2mDXvc9>?@i}V}Tv8 zS>M=LZ^g<9S8BE(|u+%IZxcC?CO<^`BsN?zV< z*0ghcXT$E+x(WL8SmN>M^56FsU zR8Tl5%aBTsy*?bj9k;rV>be+kPGlvD_MLwe3l&~!0^TI;|G>X27e5bv3?d)c_y0S0 zU$T?&^ybAadH%2Z<4B+5&~l*%?avXZ5uW#S<@?sIcRR6XzXKv@w|>*d{L;RC{e*KA z;^Vg$cpGfB6%c0KvvZl^oI9_lzPckR*|9weyigzbTlwhwB=C*-&g#u9?|t}R>y7&-`>)cq zV*dcpACdD&{zvcn2kj4$!H?Dx5^r{1{dNDFd@GIny`NZMY7L%U`dI$rxLkL^)ik>( zt`fGTp+GED-9s!op|Qk_`=haCDqDj1wRg;?O%suA(bXa&oli%IR;Nc-e_ZdehapbM zZ7bEsA?8n?x$dQ%A7`aY`MiRNno&r__?Tr-W%rP&kys{w-TSf zKDJt$Y4MLsY^^K_%y2q%i{=s<6~0`=>eX|OdaadfE?@QY^UYD-SG&Z)vQo<)Ux9;= zCxRevYV1b@@un}`KOpUK^2F?Ea=K&R?~nTDhn?uha`_{^2xD_a#YRZ5{EG5U^*4(Pu~i2nGIOv%{C=1)%hpSWf@pB9p;Io6=~1>!K5$SU!Ive_#LTk-e1Se zQdILTL*|23vPCb6J^xHFurZT+`qQZUgc)c1_JssrqnbgfCH$13HS5RKiP)fh{>R@| zFK?l6xQvh%XObaG6Ooah!?Y>CrQ>y^^&atL-w(v%>lbpi8X6zJc>;Juo!yKIS|Jm4 z_2A=YH0oPD;YPjg&Mk+gvBM^{0ijg9^z%)Ppbzo7-Wd zE{jcv$pwk}k}4(U)j zL`1wj;J$M;^$hWAhJ6`3pi2kE_iaxC3oVkjqY6RBpgY;pg$M&`{@+utzacmmm(|{r(s;mSrC-VF}`0X zu}%N!Y2wS%!#w7~ANx!l3}1{Ob4$tzH#X7>IrK6g5a%zGD!$Q|o0eMXvk&)a_o6?K z1PL$Uh&qa>Wf}-wm#*?ju`H00Ax`6 z`1|)i__zY8LXSIDsjy}&LevyXs^!!w{v^Xt{6qx%$G+;gLPv^&?14_P`=h1KjUQ+5 z&2W24vKN}1m7asQh_na%Ry%u`gm)_faerdi$9l{vqHdw4uO*!9L+wZ)Ql^}iKXAPj z_9er%Y{&KNB)WToxTx?&xQTO#z3B6{J~E$luhaT8Ey@GFOtaI#{yQnWTr{=-DVPoodkql#nX!3oefDmiu*a5 z(_~z-imYr!7$3c>H~K>!A<}Uh$$U#YuE}32#N_zAKjq-Hg zows;&rKY@_xA^>8?AH)Zd@0=hwQ}yARKo1*fggR@E=yDky;XIF95YoFg&+4G%G4(F9zTJQoxy<0_OhYWlI z1*+>kLj4bqLfy-Sz17k$=dGdFYud(&C(Gq;?&#`gL+Rz``bx_TbI;_9=aU7@4E(MT z?nnJYE!R?!8)aS;Wf9Jiwzp!Lgl5bJTJA+|FJ#`%KjYitay+Ze`lJ>7E9_Uzq|<{y zXj!|vRq4z7Mfa5#Ki*!o7dtl!U@vGZ~vI+zE@YcD8+Ef zC48kAvLm{AYc09=yZ^my?EaW>_0R263P|n}`wW<{THmx%#-#%x8FbQFG0!)7)TX0X zZzEn^(-BeT7j}1cyuUmo-aOB-Dp0K^_PqG3MAKDIhiBlXHBMwpYvIXb*e&*vhv1UX z>|{O7>UZaxl1S6Z8}ic|AIJ1ypZZAW^B~HXx`CB%Q$bAyhU~yI3AzDG;l`H;u6GZ< zHe9sAANug{JBoX_#a;(jw{6x3Ok3D{UHZ-+C4Bw<0uI;gTwA>9f8l-d5XK#TrR0Ce zA+|8>_oBFVP#P!yxcj?=wV$;#P5Wedvhr2kye zgx`ZbUzggXoRH(m8ltPEtl{geW~i|KvW0%AfsMzf5IW^3;lQ7_)>`+QH(}QDSBM0} z&iUP56O7Yujn+E<;O73f=+i$S<@B+i!k(tr*N5TiaeMoBNu!5L>W>;Vlv*+Vj5yK# zdH<`r?xh*7jt9YI7bU2v#srvdFbkUHm61r5-$&Cj_fdY^jCJ4 zI?mSkxQzJBM02?d`-0vwna&62-GweO`ObX%@&&&E50Cqb##@u?Vo>X-do!eGht}Ms zx2|I_1;G;`Mew3=au0LJy0**TKft}m{p)rAU$cSBGvvPG@$H|8!EIcDy)yQ1lk;CQ zgde@iS_a%LSAEz-zXjCX2vo{go~UFAq;Ap{p()s3*JhYG_Q8>w`XrBHLkJ zfsf+v`}VsL;@t?P`y=t26mkMSLW?pf6*|r&A08YX z+`Q+Lp&e4X`x&$uZ1LU5;+K=P(Uw@qKBIl6*-5p5W1ddSr7~|F`E+kHq?FmRYppe+ z_~3_VdTHCSSYI&y`uSQt^un;N*mu{@R{*@bUn0r^>beydf9w5iwFU!I*px8U`j@D|=K>l_3GV5H4 z@55hjHWxtX?sfO(r`y(gi}oq5DrejougmU_^Gq~b-AumIYAc^W4v5JdeQe`q&mm)N zt(D5XUq&we{y$EZX^nhCi9%^DCGJ|uSpppwPlC6}$?2I&zRLiHR=@pEIy`TPClPsi z0E*?n=Zno#&sL{fzpshY`8+|IeO2>nAG|tqSmfr&HD^s> z_XTre!ux0GrNX<%JJQ-6k$#uycI4h>hEmB;a$^GV!Ga#!2a73%jZLhiiyH4+#CsQoW}@9xM~)>g{i-&`T~ zgZq6F*eC_F)c!|W3=V&sO5TLS?v{pUqtxRF%~bTJ1+v2NpD+2%kcOiA8_Wl9owS9i zY{Ej%TK-6iU(~Lv3L&na`kLMAUD9X8x{vpc&(jD0^oi1*jcgze|2w zmMGy1#6$nw^XkTgme%@+us{0M^;TITAHUv;_Fqa#NwhH+7gKQXcP$9`6+DWxh7;x- zyS{d>#tzc1cNX1?r{+;F<#GyXGW?`ena5J(#>;MMUut+oe$8;&YAJHEQ(tul$BtGC zks_B|(^?O(-)#EvNZYrg1iyZi49lJydPdG8@g?jbPg#mF-hQ+0vP@J`>>=ET$h}^v zbGI39F7%4aH)&kM6n$BZe^SV=xu+Fh?Yw)KOqa77CiuXY``T!WR{RRNu!vuEOvt@? zx~dThmz2;yx%NIOl?n*=>bG)VE6y%23h^noE>9QjNaCAzx4d1wIV!rCthYP!>|R~H z59f=EPZpSDA6Gc;YM=f3Dn{_~9AC{?NV6^V(5mp_*__UnC$clB&uWj)Y+Q^mvu_GL zsRp{2Q@RFIPNxFGwFU`Fm$!~?9P`D?<_1HJ>_4jG&Rt&YM;+&Pmr~t+xp$pDv!_@x z>c7DdNan*H=z5>ZKL>QJVp*KCH)7@xnk$an)RNczz8ccHH##}KNTv4K4OZEagp2MN zQHS!al;z(YrR)VhZaY!zr8gdjX=yxM&#Cy|?iGKwbes?R>GyM#Eba}=&z)jNlfypU zWRG%O@@}W9noD59NqMkCbo?-Cz2s2Cb!|8R5jF}J2=ycYKK3H1)Xdc(~WlHM`a=bHWN|zhi0qr zYiwDQg5Fr2Kmjh{5YR0>Xv8Rt%Zk%iirs?a2~{^>8CIfAO7lxyGl~- z1bF79d^)?8XnpqP_;vr&`$nssiu=d@R*{5!^wT}W+EKF`@4;j>p5#A(=i|~>=kjvN zyHP>ew>}VNE~=fzsx>Z!nK+poyQ1?lUo697Uqw+X%ZoZVQgV+g3kHp}ydV;R>F8X=qEBKL-)o(xefh-`v z|GW3x$;GYZleEyx%5C4nOPxP28qOw*yY?cC#4i@y6SxlYw3iRQL2>;Kqpzo(9N<0i zM#MOZwS`=jr)UZq`vt`n1^yQ^NhM3^>c>0v;ljst<~?%nfRcdC@SK6|kz62F?ylsAqaXnxt}e6#EQfSdE-3mN)1y|yJM zepDs<9Ij7992|ukhVuH99I3)+^uFH$NUjMIO zcQ!YRi=UNkSxV(Q?$1%#z#rqDmj!M;h^h_qBXAP+i&OWVMOjLpg;X&Mk(q}a<&1H? zCGa?4db?TW8z-u@W$xY5DIQ3+)yYZmEyuge>z4$+W1XZ>cd2IDUh&c;#XK}aCxq3PG`w=JEuCI6ILzufqK6T0>5Y$GAR_RRcjd2MUw*#FkuvETV6@~}81AqCg> zhy_tHiR(9Im|vk_PsyMZo$8a>V>&DTj6EP5-hK6Q{`zv$-VC>$RPAmuJzd zM4=F0%V09W&t~%SXoj1%vqmMg=JdeOxJtn(zqWDN+)RuXWHZgVZu+{RZV7CqjBxT! zKij+$=NZkG?CXAvd_KS9^+tXLX+v;vrDj+7HdbRV;aT}%@RP)I_DS$4*X%|UTk5_< zHl?F;x~S#4N@c@5sz&^6kwBrO_<&a_*_JTWQP-nSa0z1*P2(4xu#@;;xqIWr$wVLT zMCc!H^Qp<|g`bj<@~xUKKoJ^?QrOeBclOuszCIHcKjH~<#G4uDDEB#6&@m&i8MhSb%Yl{J@XQ7JXnUALja!n_6F!;DAUcNTAd55{kfy=eWkmW?Frb(zKu zMXGBp-Iz=u+-&g!`z{Trd~C!td1&5Oh>Gp#MZ!==qT>dkDBWC10bCn;k@aONR`%Wsde zw%41-W7Sa+qbhNZ1M1MgkRzV4Mse5QxA-V#vROBAT;Z=Pb8$R@Yj01{@x5O)JgBvK zt#yfWQ!`XjTYK?p+4Db|0ElmL?uUdoGvaVLJI?orparQ2%xtpN%{sWgP1_Q&>$B6E zK7;^Hw(WppSs;C#_VwJXnRT1l(Pmg&A5{#K?;-S}han-YoVbmtr$_qyN(K#NFOS4D4PIs$a1?NuNc)XST2m7v z&FLurXG-_~tmgj564C_mKXbbO+0udjGqu{~&Noz1S50J+0YK!hYmSo9*m3M&rwd7j zB^6l!S_So%Kx#BPuwd55!F0kKtE2#sasdG_L=r9dVg+fu9PwRLAP#kt$kXvmBABHM`hT^SmIw8Z!j-%5(+tQMe!IA_34DgkX zmswC#gB)EY#VyLVcsl?<--g*k^fs_bK%2PaW)_KAG5{b1H=RH+s?pJ@f;muiVF@64 z_YR!3V=y=7QzT06Q)Q&;o???_2hi8lN^Sr_2NF{kp{6W3^O|fXQglFQfQK&woq?o% z!Q3O0L7!I+nuBp)b5sBT7;&bW>(dH~Dj-r_c1!;zdjdg|>J>eHHe_Ih!~!M9whFMgoqFyz z+hPYl53GA#hmgJSJzp#YmN%>%=7_4NZzPbj`qS$et1PT{SR-6pN_^}9tpNt4-w-jY z7%JIZ)DB4WvlI)h)!n9^?B4#I+nZVbI>1-<#nP=pkk;V?Mmr9FP%~I*!w85kyda1nqE6=NU^Dm6Io>Y|BY^eau$pNYfe5%L)8}ADc5RPa({8K?sBB zrVjr})HLsHH&2Rp?ifdM-Bu@5zQuv$3ZDyCRf;TG8m$7rDDS++c&}Vm)asG|6JakE zG0f6v7|^IwW~;M7tHA%HkcoR*n>_#1qZz=kEm;lmq|DQhmb2fS$k>{@~!vLcY_pqHrrMb2r1S-xOkW%tz>Vwi4rztVM$4h|?A)C!85$zG zEIGqWP58$eAM=Aagm}`tHFg1DRvnh*sLv{6Li_Q9idB}(mbYPu44_(I*2OZ{!QaEm z$SI!9uZq+B&SA^)#LI^ji(Ej6Y|~@5ny`^%ib~m`Fo`v{c|yMKd`l}(ML>uJw$d=~ z*+J>9z(^uC!35&oWQT9L^nS`xvSsC7qEv4>A~)o7L0NTXcQaDz;xRAY4sfhU%FP_FC95SyNo&W97OF0P!Yx0IFWUe7ho%s9szk_}XoKz?2oHOWj_ zgpRB(wbkY%?arJ=&8ZGasb>lx5;X@b*+%TzrSqD3fNFiLLv(hjimRS=6fw*xSGe2RCD000(31#aX6a56;3C=fKN z|1$i7$f$t@rPwVF3ILQ;$Q+6vxUEZ31^{?$U647QPg#oUa>zog+y!JQI!lxzJM!to z10{lDM^9<8WKz|ECOQi1&0m@ps*6=I+?TTo?1=24=LfjGcTrd`df znoouZ3h=riYb`VIa~rY%NDjx5v@xoPQt>$7G5|6a6hbmc6y=&yLh*!UfJ{(iM%s|g zDy*1AU+7}UkU{S(v_#e-Nvv|fsrnR~Nr-MH#B_{&V~~f&@LaLoi6uEsZYi&!0t)D| z;tr#CIpJoZW9zEHu)zRFrUpQ2suGQj&f(H!g9ETWXQhnJ;8OVr=xVBaxNRV%u$0A~ zMYh4f{}I6EHp+G5Qj1QU7+2MCSSpO+f}+@T8J)P{pmA}`pQt1YaGRROL$JyudWS_H z|F%5Mt{zu{IY{SV1}UIeM`CGugvu)1|3qI6#h^x7y@PU4WeR*M4RR(XL zo}!7OJe&RA)zXFz?nZ>Q>fGfY>yn=lk@=So521t4-X3t0jnQ0~wUd#=6|iK_-6n$( zbT5~)`muI!*;Z1%}H2n8s$j91@qT$hzEL2c>3V~FVESAW$Eq+Pj8upuOU(;7IHo%NyWM#*dK42adH_JyX{iHz zihCC_+{e|)ppmA==tjOIk|f9;|gNM0^P@M zyD--7k3#I}YTQPeWD0XmmTE*yoX1p7oXv6FIM~XZR2;eH#R8`D=CuHdD~nkbj2-w^ zmrT*j$TF*Rf*eK}$=&3tDT;*Nq-HvWUgm{twnCn=ik(a>-Bq76NE)mR7Ud|rv-XKi zsgrAayK7Xa-g0-V)RJL}^+C`KHNLvDRDG^i)syK=GoXT^;#5K^vF9cNatn2hQrJlq zR^E~gqol~l)Ts-ldO2(P=Bdklu4-uXQq`5k{cHj&TAUeVeS45tEki?VP&bR zw2{T(GtH~vN4MnetYw-(v6bhn;ly1e)5y|Pl8G}9@W<;w$Cg#m0jb(x`0^{RHmS0h zHKV^FhmBpbu=^(Yc3@~{mV=zF+=lPHQ62v`~%`^ zgQaIF98s#PB;US6qn}m(%PdE4 zQ>6V9Cq1U}!aDySS_)gV8Qh`aw5>;?F*I2$7kjxiG)$Qp%QP^UH&QHC{vW__yEw0! zP2-%h7E(~J|bU_Mx1i$4o`5!5(9TWUzy#~5pbsl4?M zV64Wq`&L{Pw*H*~ng+&Cc)}nMs{UiB7oj!O{z|XuMhKd@ldhW_FW>5We_pdp-is4RyKU2o7(W;2Xzj{1GZW{WIJU z`6VEpLUU`zMTK_+(j=uF9VjA$XW_e$Cc_Qs$3=ok!dcn07SOJ!)pZNoHFv39v8{9leV%d zU#cy#bIYc9AKr(Tnrxt}Fuc{ng>VaJX2cxch9?TDlX02P1w&BKP;3CaH0un(hFj$yQmHI6V(&$)pg=J1G;ee&0Lf#iW~73I@FS0fh*a%g(W6ZR zGH4Jrf>M1V==hWAJ)dtT|7o%Oti#4oiTJtXfiGjbg8V>Sy~B2NJ&5x7f{D#k)xG2 z@;P0qU!DUur_tEN`*?^ z3t}xNwVq7_|QDr(^*-5_Y^aUj?a5#8ljYfx%Q1}cjin&%Hw!ls zoMm>ZB)3mdD%fW_;C!76mj^Za4$+KT(YSmu7-;A?-(=AU?S=se3hXbcgU9sWXNwC` zslf*`#qHke|6$Ps(fKbr{5RQTMiYrlonfUq3k#ec<)ql{q>kQbiSH=hF>xTFi!nE+ znuFzBwvvq9gaI(VZvCpzO8O5#N%tjx(XofYkPKBHz_OWH`NihEew45En%> zL1ty;K>O#cV1`8|u5%SmFK~F9g9C>gP|2i-(?>DL>by9i*farxO&&c#nr~09|lWhBK*wjxRRxoDQ?0DkH&3tOk|8DapYJ) z$FATaw2>Z;bh|f~o~xb|wg2F6gWW)=!~PF|(r#r3MG(B>Y#gDYZu`s~4kxb0<(
;gly%DVs#y5QXlg_c*tM(dh z$>n1sPxwhI8(Uw{=ywA(22fBzOvlqq$+?^cZ}EEGJXhF?#v7J}>%XE1)Eb7!V#J?4 zSpEZ;B9rLcC`OR|)h|WHFxCJg35Fq?BNs*))VRpM$E@Ji^-30nnr&);LY`YAb8qWh zP8N+C38XPk9x@7a+V@1igZ>QlUrR6kjViy#&3AS(RLjM6xncZhX0>2Pkx!u`o$FFA zf(to+c!$v%h*zMvzLJdw!NJg_qGl;na!3OfSJe6MY~tZY@OJM&FvGCycS%A+osMhz zvVVZ)-^Or(TLSSv&RKr{fa*WYIifSS*=Nr-w3|xyik(N&eM`pogPZJ$s$g%vDTqN_ zPWRAGZ&j#?E-f&Pu@)AEnUccuxEQ8k42k|&h0G{mv;wWMHd#IJf{2|3rp;SiFk{r# zBkPUBk>g{bREjT-+KSeI*l+xj{zzl;)yY{*4~uVwI3FCNtEs*k7@|JXmVYD2+zp0z z?3NNSm~&Bap-toHOrNbLGRA=FAb|`JH&_(D2{kTWOoS|M6IfW{3wM5g`N_dyzIf}( zV;`SIwdg^*@#D~f09kp{KD(k}od>->=IUfYHowNZ8F}~wU#briH5}d{ZW$8Da!G;u)(@=O;i6KrS;lyz!N)e3BX zI$GOuX${9?wlN!HG*RP%<2r6c>1Eq>a@T$iaedR`p-(TTc*X8ZU5@O#e?V14!r%PA z(Kkb%Y2RO6Uq9vV>K$n*>&4UBBT~xzq@ke1IPA8FKPmZ_)IAX+zXQ2W4hy8& z85=%7+Ma6m%H+ezk7%y@ZvQAsTWgiN@Ly+_{vBE_J~-G5)KX0?;`7DrtKd@L9D$dV z{>~@Zd2h4V#On^f*le@$+7%hst)p;!IgF|CY8Wa22aAy0p1HCEtyHc5y=aVXl?2 zk!5QBI}}{%A+Nm|%uAX!Ve9u^Bk-RW>%2B2zcDTeUblJCI}hJ7HKY!wu~YM2-fFO9 z*~h1$@X}^tq|#xuj0WP_M1z^k2fbA79EF|d5)xvLdYE4b^qgV$!5Wm#V_q1;UM5a{ zoSc~%{q_n)Be7`xDSTR(B>dtNPwc&&nMwqh_=Vh;ZSQA}K13{Y6K z1*w^>oUvYw%V<;PdPbnLsTH_Yn1LZ091YswKg`FS`DpO}=3mq>s#>!IIm#du_~6K>iPu_C&@SO04bxDaM#ohSC2r0) z)l`D6Sr*h7v<3}fZ$PNFO+*jAqm47=Z^lN)=BBv{Ue@mlO zh=ka%UW;yufBX9V-Rx6lnvYuLuFFU@>Ocj74h?wqYMuV>i^{g2?vO zrBZMPfcZ!oCTO{I$Yrv_W3<+sOxRHOD&pCBOF5wyedBi5eX$jt#&7j{eZHyX1ckz2 zZ_S1jW?~1m=m|HO>v%DX@;mk$NMSMKL^ko1uL|?=X&Rp9*(6da{vy^3+XiEKfdRQF zD_l;W3T91Q@rs5hRN9EQHZ!sD?@;pX6*5swv~um=Kj#5Rvc{t>f<9C1b5a2Dg!Xb^ zOszQVOf%|AG%0~^Uu{sCt7JPcV%cgFjTLkji0+gqbt@KDj%U}~jxe)hFf^tHc|0tN zA+YEihVNlu!5{@voKFn+vLI=u)b_n7t~v`mo^vHUGE_T;v%yI;z#q`u7 zjQn+|xk6D}!SgUHEOS)(fQ~9w9f%xzKJP#l{@9h90*i+{4QgPOKpGg<_TEts7k2ik z)vOBaXk)wZITjoFJqHiZw2Wv$#zi&(;^BQ?3v^3D14a^zR4NvEcCW|FhD1ddY}j+7 zV`u6Y(%8a|;}eL90_wD!_fWS^K?OQ@C3Ge{6x?u(_kdZuEW`!!+A8*yV`65p9D3@{ zcd7zSXj$?Wyc76!R&V9v46%Vz!^|Ip!g8H9=|}tr#Nt2rM17N44froPgS~|`NdQeS zBVA~QKPxRtSgM4xZRtzI&I^8fdR@6C~`&;%n5If=riAZC5V<%mQJ8hp7`~$H)P#MI6#XJ*!LpSMKyKr z0Hj{{2co;pLbypc7N5!CrL8lhIT|IZyDB*8)|hTjuQsF)gk~7*hj~=G8p^>Ml7Scl zWP`scY1yoze;Y(o`C({81RchYPlf^}btrGJe-Ix<9y~*8rSbodrSFbr`}_Y-NQej$ zGj{CQi5a6rY*jO6mDt*qnJe`03xln>+tBqK6F;P18h$%ZoO zK-^eqYM8M6V1ZZ8D95|ZH$gj3CgGXQe6w{{vQy0AgI&!YnKVd6v=Gq06bww|V{=mZ z_IcFgh60Uf>I8u3^WjdWWdoqtHIJ_YBW39j7Gzn%XszB~fTH-AIP!Gm@Gk&wyQCv^ zep59Y%-GW7D-WN)DwKxb(_oCPot+nWsRw6*o^n$s&r@rM`)bopzcac{zfZ)gf&DjTVx3HvEbz|CQjTpZ=;~7!3Jc(rFNJHj$|Ssm7G_e+b%(ba z?n&Cv?*|7eUTdEU+kYXS`6);xr9|YYA39Pa{6|XF*2-J#bkj`*oz|hu_uc2_&N1ws zh1?51Nk$0_BI%anVG0-oxdrb?4s%ZwMccG)lj(FiF94AImhg2(cMY}$Uz^^7=FO#z zh2xeIvW}nlZiJt;Y?jBK#_r?93qn&>E06Dsry9`21J_Rbj}tT<7_}#XFE?B^%M$pP zJ5O`B;?;jRwQP8(4y${8WE>p$1|VMW6wKPa>!We7h}KBZEV&Is*@1HcHA;d+&t`#58FQ*{=LLvTX1-=n~>`Fd>VUZ z_>*rjLGr{aP43-qox{Cmjq&*IIIphKL#_#fcdxk@T;#NQEHwH%;pDJ)IJBBQd zDImnWb1M=cjA3lHHn7AW-Oq1Bc|99)R za$;;@HkBG!fkPZou@b1N^5cXz@75DI2BcfZj<;4$+kP5cJ-xiK9~$6)TDWXfV5Rz={(ws(`oSj=tb9n%RTH+tyyydUUyZI$6Q*xDRi`NMns~FM?fvFR z3BNDEyvkgTlOD_9qQVV_Pp3EoY;LZ(C9sYi+i(6+Z5VhlaN7RoTj2UqjrQbkjuYGY zU;1A3_(z^DzYO2Tr~k@5Vx4>OQl$U>C7>yLNa;<7v+`v*bl-Xo?}R!hdh=z0WM1Tu z2E3ZsDE-&&4gEMQCkxUwe_2_KSQL+RfV-&61}FEJJUc zsjWt-_Ezkk@7YN0|E7bKtXd`LRHa2FnTvAHIoTGl2hX4M>y-GU3Cuyc!^Erl^-KK< zV6NUQ@OpbifTV`9ovJenE(^(Tv1gq~yf=j3CaD+lOK%u*gja|pS7k7S9w1Lp>6eme z&+vQo03&!d(i>jkHt$L2o=tDMltDs23Qz=QH3+s?Jx#pz1NR@CEnn>PPZuM2I`1W| ztl95mQ77j38{ievDz7mKGM~GU$!f+93i8&x28-~&_2~=UV#k{ajTI@bHJ4*ke{@41 zwmIE_Ve-0ea9#<56~J~2MUg4iZl+&n(39~ekEKg?8~L;vuPy;h z>^l3Xi~%L_@VcA&sob9RF{-~V zqWu0Z!1eMjAPC!M;V5xVYb9} zGgwn0(pNyD+UQAQu(8S$j5_kFh4N)ZJEgLIWF;a z!4UH%d9E1bDno!Za8lt$bs+eVcbc(N-Ex6DzuN1LdlCWuCHcPUyrPtIqATOG^*g6? zE2qmB`PY2jYX_jJnAkL3K9BTl#><(Jf%R4zbjY7zIh#_}Zd(8Ro~09^HI?o415P@- z8sCl-HdXJAn9IvG1M8Ep1|HyJNu_1Urzp5M$lg_gg-LR7m5Tb@19}QN4@{{`<=~Po z@J_`@LNuU5E(Bo6c2kj(kP6K=X1-M1rs=u&)W_d0@!3j75!da%c6vfE$>~3=u%`$WlMnr>!W4B z4a!?{93rq1W>5Dj64^(Uvxn%k4wnbaGN7B|3%rS(qTH_3PQ zp9ucwgat^zuT5?;2G?~121hno;PS9l_pun_=W+ZRl#+k|>3$fyU-v*# z!7!N9m={S$m4`Rld3C0DeH^k6c}={>eLrKR_9_HyQTl#TWBK54c`7$+{BE~}Kg6z# z|2{HRiuI{m{u6#5XT_o>Rpz|uK*hp150Pi~XzIXhZk=Lck|4e;6=+9pZ!tq^KroLo zFojC7zS|wsvKrD|YRh|FB#Gi-M5Rc81rxR0#Py1gDX423UWm4-lX;@0ubNH$U0Ch`Hb75^Rxk(>wwOjqqTTXW zS-iPXUxQMRYIF*8cTu(e9%uv>J2TiAC8^fVRhh#jP+1(SA0js=8wFgth&Bo-0w`It z;}y@Ix(doUT%$7%p5=;XC37iOoM6Ka{9_|uTk<^&7WoV zWfrKZ`NCy3yTYa{Zk3#L36k` zefZ7dJTX3&kIJ0LXA&Aw<*6;|!~F%A>YV1V*l>#9Pc8~WeKY^uIUOnCf@U$;{^07z z`Z@4d+rZ%AVYrT=F_JM9SC4C!>Bj1vocbXxebEE=&?@a+A^$OZS2(va=LVW_Lr2I< zYGp|8v`gn3_mrvbsfCmlZrC(KZNoLMyOqh{qA6+L4%&PRc`Jo2Urk$^lgyE;kT*F! zZoJMY-BJnBel;>RH88br7_Zvhe;~{@G}-$a6zq+RRQOz>JvVW*7#4Rf$9u7Uho?0A z5a#2q&i}PkLT_cCGC)SU1FfxhE|rle489O9=o>s-+YC87$j{V>kuuZJRqv?>Bs*xd#%9(9Uk~Nxx8n*f4@=Z*F4E&= z^Xl>dG}#2a%v)E+YAa?v{hEaf8J~7C<~Pd9yBL>aK4>VqR}=IrZMT;G+~dyuKXJ9h zKN8h3md78(M|n2J0(|o}CfK5}2L|CEu|9)IK2}Wms-|_=s?IpqN+hM$jDsZ!XvcV> zMqAKko1;bo4UCeB0HBkV->}rcZ3)M`)tsp&!SF`>4B3=blGj`iOjKE%eWWE3(P+Kc z6^wV3Rr*qdi6l+sUuN&bSJf_w%OT<;PHj&$kA0V0rWomd6NOsNn_9QAj6hd*<4RTR zK`bMglRLCt^$YUX_6)=1X-Sfj%egzp)Flgk@wv%47vqbR&>giI9me4Khzc7cg$NBv z!bnkiSAYj)%_S0;#~+jRyHeB6JZCsc`M0cFA(BT{VgG8mj(qLUvp1O&>}ia=?hTKo zzkqpJZR>Nkw2z}azvOV!e0&i(^u1PFzWdkOzCGJ;S=W(KJttXd`r9Quud|q$hqMy` zrb`nrj`YU#bkXNZyGiG)Ssg#*T-OL_?^Rn(>?-w#EgZ#q2`#xw+#ILaOm;~GCPFu| znTq|rOGWz2#t(f}44e%d!V`Sp12;jYrsLrq|18BOJXS>qh9yhsZA1re<(F2RHYdcz zqY&6tMiAfSV5Tv`FdU!7?CLjei00doGm;Ht1?>jh+%BZrn5je?_#D_oO^W!B zZu|w1d?v%z|MUj#>hP4EF4da$T`H=*n#IYNq~LAGgoY0ByLxA%?_T^cg%D{}TUp?Z z@+*Yt*3~sU`QqflLJ=P@f$#%a(lj`MWwy&L;8rO36~Yc!_#X|4lX?Ne)lHmkrU+R%o@O1z(pnumK3k)5IW@B3IXE z7&kK+iwM7UdBXIpbxgULKDJ?b!)HQNSkMu4KKS+L$9&kmAuErVy%h&T*Qij{u0&U- z?ID!K9iD}Pd9rZO&6|Gb1V7tCDM(1j-Ux5Ohi6~6UU=Xggu-o_;jJLaQmw9+MU)<> z7{S%)^WH>tAwL;a+(aS+RyH}iIO=fstUWF`4p2;N%itwC?O^Fc=Ut+}t0sgGJ*-ze zFqI|llh8U6%d?hwd-kna2*j81MWFUN4Su9fu&*R-)Bfk2J{E|Ja z*M%`dmTIeEgd}$E>0e@m!3S)+NZvQqi9oVR5`fOsnxxtw;MR43NpCE|a$?H|pIYQS z)zZM2dc@2M1Z<0Js1kd@{ zMkD1Mj2mxv=~TAUt<|y&+}nA5%L=9bxv7GVlxrI3@ktYUl9J=QAAbRrr_a~)jYt0i z3{Up=E3;Efo=Oh{;p_11PvP33bxwGt@JD77{z)5S4m;ndz3;KUJiu=vNz}(aTyn-{ z+}@c&OzWD3PZCXlgp^Evh`znB+THxoI9m{`Yq`28sYUT4pGV?=93uGN-od8Qa4(6N z;Rnrl5)()yaCqWv+dKdzsj}VCRVqmMcQFVBZLJH8Ur#{}Q3JW_y>S*wqx`B31;2Pf zLB=W{%76dwS)I2;cWR=Ekoa(!Neml#5+2*c2C_pXJAwYS=dgsnd0x%TuNdV=!K~Lv@GSvQFaNaStu_W z00fDta1_^d_t(geZm`L-ODRnb`?x6d=6^zl=eKZFg-=Y3MynzTiglj+oB;ue7Fa2) zLnfzGF714wq4s&1qq(r4=$8N zSM5XM)z?gKq4zEPs|%wwE-_d#Wc3?VmbK2ckP7DzA0o3eD8%g4trH{4a}n8SnZG~k z&CWx`DyOK4DNU)bOvliuI#oqhawiDYyC+;Jzk< zSz@U6dck+?n3kRGFo@Sr0Tmxf8i*$tu;ZL`l<3S+_NvZGkqCPFXphqx^xFWq%Te;i z4oF11b`h%#XLgyoyU{K&5HrRr*WHH# zzP@*@8;Sy(+DS15S4vJdn0@&$`)lWZ! zhhqM;Z%@BVjum7#z`Ps~d0h3#L*JURWh{hdI*T?7@siRO=Y(OFahT;vGnj|}Fs>o- zEP#<;qbiE~I_Al#qJ^i=FO;f~j434Kf^mHc6JMs?6E+f`6ZK!~9j32UyUdEMaBj=u zn@R(gheRX3hGihY$qCA53Iqv@g+xmS6-1kW-xZFb=PcEEEpznRg2<1jt4b^bqf{k*pt%V`5Bv0sif4KeFBhK~0){b#f?Vqe5Pb+I1rszW z>(R-T0@B@D^JBaowvTbP|nM5W?{bQ4;j?MyfwG)xqWXJ@A2P(b%kVAb+ zSz>k3BkJnv>FUBwfp$P(XIwIxV{rk%rNEgUZxE~`+1*vPk|pF@r8Q&eJu!5Ced-5$ zZF!iB-o2};n0u7)v1)765UVEArv^)}GqjhPv0Yq}^qmno8x$V^8Ns@B%StOW~scvs}g{8YLG;!nu$QKQa zKyV{+^=mwzzY*=9ad>$=P)JC)ada(8(NJ7fGzGj9!2Kd+uU88etWw<>7Tmc%HzuBQR34l{k_tE-WsHp(-TaI{IP)+IDRDvK{t_ zb?v!e*CQTklDmfV=Q;-&tMh#4-R-H_A4XKyLHb;=6KBqwG9%UNLg!Kkc(7~77hGawTEeQ$HCF)X{SsR29O=*Lr4X}HUIyM#p=N4v} z*g`M_F1UP+vwjT8l+mC=O}bMc3|_#8 zg9uNsn%^DVq5bgYtg4anmF!R&U8se@8YM&tXAe!svZVa};6l#_;YOV+*7*1&_BTdK zIpYMox@sj>X<MImL*OQQ$%9iZO0%dJMTe! z3A4$oJuuxM{9@u`A0CyR9~u(KwTxm=U0ugR06Ck;N^nVjQp|hp91lo68kt%x7BZQ_ z`VVh(iU{WLnYw1Vo+FacR)ou0I3#i^%K+9J!I=$s0SPvZ1ZTs20r!@-jabT%a~GSe z(nbnyk_NObVif|#k-(dmg)yQo2T}tV4(7XE4X2_yA}*#3sy@h%D@*dZIL>zr+5jI; zZ>1aVpr{$e>Z>oZB#VOf?y94P!_GRItbGO^G86!;?vw&w(-5;O;CqG3yFo@Uj4e%Z zZK!kVyk-!ynPwSWnEk$>PcJZ=QwU!sqg6 zgS)@dwc?1%#p*s|eAZ*>D?>>l*Av6)ea5ncd?5VKzo4jhtJQt{rZLZg`(!abtl3QY zMAJfaHgjl!wX~4%zoL0Yj*7kmRb9YM|L&E)fOA2(Ashf?W@Z92f&TAf2A>PU&%^JT z;Swk;h#O;MkM{;k+J*5*sU_wU>Uh;1NXCvtpRR(zITk+Y@Z|q56=yaCbap+9%5}aa z)f~Y1^W*~R$H_Lupd_#{*np;4S=&-um+P!id%u>bIq}2yHD}^yVVi~@ujVSV-HvcS zAzmA#YKl8nR%Q$K<^F8_%+;91 zVZ`ChNpbz}f>B&#YFFbOJw=1oKg6Tl*_TaX+aZxogIkv}IO3ZzMewR-CI{ld>1v?Jy<%idn#)Oa!QnX_v?l8bnO z^scrEr0eVTsC+i8!64@r^B1Wd^ConxG3`dSLrRzT8=-)hzFO8=_)R7N2<`=9#YjoEZ6&kUH(ioX*x(LulD> zIB1I{_S54XxsGIOw(r1Qqo88stBP7V<*%b^GIqo$w=g`ckpmS7Cn)`{*v|c8=bKJ( z^aoGwElze;(HT$LId^EDPU=tUzkN=7Zhh-K2}r7O;geVvjtHir=|M$l)F$xXE z#vgEBu)eb~R66xr4Uf%E;cwa<_>q*|kANGZ-m|(RIpx{r^MJCMpRGUEX!ZZTBGfZA z^J!jkB);O4v2--zU7^vl7wA;$NGso;x)F1t>f7Mu&tl{S4%t;DU0*XUldE!`x~#6y zN~W+po~1}$KCPm`?nrN^@b(Q?`X=UXjFXd-o3m5D3MyKm^MIaW=U$oPc-|-Qp}#T@ zlr=!rh2LUrj;{gdJeY+@%xl%!vQcf-glfJNkcsaRyYxPn)@b=}6V`$5q?>aE7lN!5!^|M=TytVLO;OK2p6aY3ywGNq04LfSR*dCQyg2k1webec0rp$G>9yz!8=t2aC;WriNPA*JdxkvmD=7D^!j;2~{dkr{y{u;OipNlAL)rE~wLr1!_8v z)X4lVwv1+Vfsh}@_|M_lbK}r|T_mRtm8fEBHcsTb|ng;7(FrcGQ#4lnd7)6s51S-=$a`L3>t|q~tJbGBAvSy3b;vcTSCktw*mJ7ulKoy5PhMFEDe{#!YjLqgJuHYbq%Go4|z&#%^CYm>BcvJ4N=dLlCnex@6F2 za>gT~ql&}25OXFcZkB_lbRA{WW=EARGJ89c^W?sv6nk!G@yN>)O~a(#$wJ;MzgNx` zEeFqgJ$zGRg?Fu1pTE20aFwY=;L?3zO3byTl%;uT;bW7BPS@A5vyDoV>#a!Qpt`oiAON4rrRy|LAsS2lmWmY#wIz8u>?&HK-t_Ql zSBm|C&=_)0Cd}}_>y&S#AK91?r=o_v_&a3;7g+o}y!ZamD`I74m7~zNmC#Q|R4*tp z>Sn<+xZ&Erak2^HtH!yy|!!tqWFJQtp-g680`dd&;Qz zal(>{^aC04u}qlwj-QH%_mZQ~?G#4BTWp--S;V^`quPEU~@(;*(Lg8J(A7w{|dcJ<+fv2R3C5tutd6Ul(}e7vNq|#_;L5^wwP zd^Ql_y#SbXd_O;yF1h$xcg(1-3oYRrT%VJqAw2GC-*E%7aFt(o< zb7yYQpW$7miOx=L50+|NUrjq+pH0vUA4Ij&uC^p()GQ?iT1(AJj)>*YRr~YWLriBL z1mU-VwsjYHW;d=XRzkoYF5AL(awR%Ej+o8EdkM^6-Z8&-8R)%KYb)jf#-?z|>^F+IusOjI#uX z#tCr$1?=;(j}yJzipqydl6++@8o7Fya}66*Ue!Y0t{dKKzl)ew5*~S#kXLR@_FcYs zU?(9akgXC#UjHVXy&jh8~K^@YF`Ng@nuebpXwXfT+ zm_FD7A3N<}f@Qqt)Aih$#uXt*`A<2sbMlmet<(;r7V=`zd|Ud7mBg%_#tOevg@tj6u{ z;xq6-LE`O~hed>FmRVUetI6Dzv6+IQX%-B1( za>8X^0OKyEhe~whhWDrU~K$<0bIisRnHY?|ACLigjc`} zrr1kYHSH4^g1ecV{sOjk_@zl%_M9(A^=@{z(e$PUC@wSb?9B_y6!3v+k-NkPbZf{h(p8}sxh5V_=g)I6 zo$C@OMP2LRDTi(0>@p&%(~twzYWy>vL{1jSCC#f&h{>O@<{BV{oyT&2xy5mnp8Q-* zMzhmJc-W5jBAK-NS_fb(;XpSOYt3K8)&t0`p+%z3&4Y$DftU$PD zoOS#Cyz9}({Aw*A(v5T!RcVyH*)n8o&D?Lg z*Pqee=H>kti*ed%$4&?@s;|c!h$rtEVEQj0v$j(v)FXkP!H8HERDt)k?Y&_u)vZ7e z7=yLPy(lm9*bW>&o(1|j{%WEJ+XM{s6_CG^-jV z5k>y=YbnZJx<&q=H<0Hm{dzOY2hEyesj)Aw*F<<0-kFpb#!021CY;0%9yU(cGz~oa z_hxupZ_(JEet}H$r?nWLk$uCF9H*gEfz*iOiU9#xXJ1c-vL4C^5_;-egBe?w{0m5# z)4Co*m!|T!wt1B=TOedYsee37vHuJ8;!I#HjfTM@wt&sV`{{e3?;U>TZmSXMH*-f< z6gB>rdI;Y8MH}OC`Z(j4un}jGG{cj4L*nc)fAK!_`|U3VUW+dkp8s0NR@YR-KI6NV z^K!(~Jm9bcB6x44?=Qf|>ewR3eTIDnre?gu{fsAfJpA}JW=1IPQ1(rWO<5yy)}J#N zWO3OF0XF7$S#OK6oBMfbt4;o!$*W@Sx@|E;g;`Y;m+LjQE)rQbiZfk6_XT<0-qhS# z;bFY9NYjRP-sR29=-bW`@jfCYLzpD4B@W7e)U&Ruh!|RXXUbiXm_)V$kzEI0P8z8>oMwu6UnSLh2ulJ!* z!HM~@S1Nt%WMgQVTz+$}w>}mZ!Y}u02Z#TYwANdZazualO(IO63nowGoiZCMC+-GO zVun7Awa?ghlFZYZjx}N?v;RqVYSH(<`P*8B`<`X%`<&2kt5(b9oc>IfqgE?3T+oqA z+2A7Aq27Yfw<nGyWH*$+3R{IPU66qVK$r@rd^P)i*Q609v|RH zVY3kie=XKS$^?Gk=1Mm)gE)%q*zw}2Z85e81ExP-Q9w8Y&EJ2&nrBFBt^HQmTdi>E z_Jt^x@=l2Bf|QmP1eHSGiH{&UQWChiikO9-lbau`UGC2nr{mTIz2eh3=?^(Ax`S;Hk<~W*J&Sz*x$s(U;hgzb{Z1f;#sf?72Bq0 z7achEbyl%JG#*NzNWsqTHSQ=TvgKy`UWn6Xyp_wxqgD zk6D_Md5UI2q=mtFcx@k^TEq@(e%Pp54II*BQiA_THp;cR0Fu|@FARmx-ZEuB^9|-8 z!8wLbpqVd+Dip*J8bcD3?y2_ZVz$mC)~jgPP!&~@K<2lXbm!efphXDWh)p$n)(Nxl zO2P3FZep6OCm3_F|Q? z6h}!CXqmSi4vk;N8zUhTd5D5VH;Li>wNr3Lp_&vFdrUr74?1eJlquLMHA0sg#x5P|+G1tG1706Q-+OBYLpQAobwi3S9? zh2jLL9a&anS>xjy$$PNd2?R2Y=20a!cotk~?354;@pn>2T=*vZIv`!<;t;IS$FvVQ zw{DeNI%u6Nr6~Oonh6vCxvdaYDLg`zdZo+K74svsdA503-htfn83NWv+E|!23>$f|@{Oya z%y;tFad2LvO+hes^n^{sMZ2pXj2p3n1#%3k}-nR>v4eZ-~+odr@#O z6QccD&L&T;-=Bml1X}_Zl4|(zQoB?g zx-jIn5uunHD=jCKee+wg{L|eG8Ggrz29*IbM0m&z*-vG6QuANy%5~h4f5gk4Lb2>w z0$mAu1r#)?Yd@D#o*KxXu@$}T)(Nk)pScQ<$Y7g+B++bUOdrZk0%uJqVsa(!)mHa0 z)*6FQb#cEf_5S9=02Y2W&`_{tLDj_~$w0U>&l2lL46iOvddaW^?lQL&dP47fk1>4~ zeQRF6C@2TdT}kro2iWPsEC2gnUYe}dK^-PPnSz|)5Hv$#T*_rEa{tVVkNH%}-WDmc zxEevF8cjy#N&2EF?w`t=dW($1Ex`g1Nv=TNGar~q?0_4XMmY%g)4 z;D+;#clB>uUH6GD=1>9+X)2KWLiGTMxZ3wFB-*wJrIND*-s2^6DM^eRwPy4-&{|$w z&ax*+3lfI-&w=P~rn@PRb#&Uvsw=PgD{$~G=bxKwA9d^5aqmi;2%EdDz3dp6|*B>hfMr# z@vSgoA$Z4+I`%~m+k|F>qw;%M6$02OBFqhsZp)FSN~8n{u#<5x{)ANamAyjRL{_(5 z*JMw*4vt#g zZ=D=@Km%lGQ7$dJ7E_qCp>P4&#W(sm1CI3h*g-<<`z+C<k@^dP#@LfRd_#ys-zmoirXXvI$#IUR=rIi>zdO-Zb`M?A}HBW zILqEZ+<6zOi$GS=t%RYut3{V6qCnwG)h?-_ke+e@E$$1W!4^r?mfI_hb@nYxI=;|* za)$sDzT=Z5&kPP}Ne>~`y{apyqPJ7DBs*-PFy2Is^_ycfH|s%0`E=U`Vw^?BIQ07l zU{P;okCqX>G$a*Nz!KnZS(aDku2U61Aji_H!8g9T4f>zyPy)|SIS30Q_h-?}czLAc zXmxieWHL0U@SI_4@%4dC-cjWYNb3bTU23xO0 zEUR&))(M76j0*%Z>-k&$$KE4$$mD#O2(5+&kjfIjReSanp}WmwjAM<=8fdZ_&|n-rsP_^~2Dn%r_W?m08ph8?P+?v{A*Fh}@u9i&XiGxss)D=!=#JYHLJtB`AYC zQDm|j6WE0l4jgJ5eqA+iseVv7_ynz0i>Ca_O)PNiC2a<8!G2S8nvgfiJQZy0V6*e| zD$y6q3nlefeudy$au#NzN;hiSW-aaKAV1h-H*Wwr^1}i0vq}N$tzAav4=0;QLi3c5 zi?~WQ_Zr!*4tac1a6^|mSHyP?|H(Q%Onb= z1-}-3`Uroip^bz$b&bao+E>+;IsS_jT2&)3Z-kzE$?QHxATUtn=|MY`UyC59;d!Jr zgMGjjOGUk7n*ru*^G`K9-sIQ-eAgqyAUEW6DBi>OrKP6GRh!c~FAiv&o^F(nv1ih9 zJ!Y517TI#Rr1K44Jx-#n>M_a1x~YMpd3q#`z@1hlPki;Z@$R7)sZn-d7tzgUf$4}BGJg#nwHS#s8wanqtUjTn6Bna_v;y=$w zQNg|=CPtlv`0=x_dd zA{!dX1<&nCVWL}Vc6ku$41wd1Do`LGz`V%fA9UDHo1`&h^`Icztfex?u8e^(P_@89 zv@mQE6O9_qtDY8|%B$p2$&0fJl(Gtn4!7Y7GP*yEWBw!&FM-me!?c4I$|2+c*oqG* zA5kUw7Qu93#-2m)?07QbV)D(a`|M>ewLf5=4*vyw>#G!GTPDE*NM=0!YN$Spc0rBh z0o}Qkz1gaPAk!R?gr&rl+qT9Pz325fV6L(Lrw3*1=YO}NwYtwA{P?3Li^HZ~9|V8X zq}@}m9^AaaC&V6T7L54e+j04>ZX=CJy0y=g<@}~c5hjb^H>&LI>NtSlFFmH|y_IjX+@9Fo?YscJDv&09kQ8UUD+b2217MtT=g05q=U2FZtr z#0o?BzsZKeg;{16*|$)zaVPdFIqwIBQdJv7Nso^Mgq!kkW1DpeaN#suWPl`VR92X0 z#v?L7;EMiT4*-$_rOO>YE6d7Nz?9CCu*)0##)&69C(VIy9YT}Qdhd9&g@-L~?B&$o zlKwB>d)v3*6X7;z0FO-;-#(}7y23IJjWf;%ePjAl6>+Mi2C|Dh%S_rY&k%Lq5Y_G7tgl#fOC8gVL7`ZxW4 z*eQzXo*Sd;yWwK!Yx&M6(*GzFV}2&_)EeykOA-lvJ?Wp_JPQh(rdf+G0szpAu24Jk%$%GV*O~Ahq5)Ia7}RmY6Vpc4*uwWR0<3Wi z@kN%0!`z<*e-|ALncOal?OeQ&zM%4w_~Fr^c;uS6;^nDDV{1}!)%`NDAPMJuST4^q zm(AeWgzoEM>Xfkzm4Y&Bri93Wx9LUS!po;n<3&Gnz7+`#*{~hN0%h81?3nhvc!YYo*3Sd`Vo+~ z+0WvlQObwOt7tH%(bp~EY#rmG=@ow_s)E+b~{GERnAHfKyJonDI{XqRFAIg;QfiY(}1dsFiURO%g>6z-M@f2I|bT zYm*@<);%nFer!oRZ4XbsN(G^(IJZWBH$Q6BNn^6!g4SHj{kp*|jYt%R*Gl~Ygh)3h z{{=i7!RdM=qtd+93zwqF{(x{93!L6EU1{Sw9r*uBx)y&X+y6haF=HBL&e|L|rxtRq zX6CepVT4r9QIf-Qh-zk-!x%=yY7QAvA&HWQ@h${z9UZ1kWOEdRnBm{>BTb7nj zS8_d7%_dc4OF}GP$hDtFoBv>|P|WdOG(b*ZhXt<)-?&bPylF`G;>F{-Jj@a?Xnu%H zwKz0%n|QE58RKFrle0ldf!T7WwsWY4lgZ74TY%8kQmWatoMm~Zq(BeZub)oS{+T74 zNO0T1q*~n&c~W;9svfwV3Y(7&g}dmc8N|NIjLoV}dS9#`wrvHaRmb(?!^At3 z1`NZLgbQvIgF^==eBe_HiR+!J_P1g-!)_nV?$FIr!kVr+4yv^+7^)u_IMCJnLC3>n zlxLxdp9BifQ=TSt9M!yZO#5jV*>%Jj6V_ZqJ9P8C-t&4@IbC?Z-^6w$C!OHL`lJ!E z6dv*ck2$Tkym20Z?{Z^O*gzzr5>MpU@h9v1Zna`wSzxEi&q@s&t?@*6&=$C(ST`-G z`5Wy}L6`!dPjsiWc7(h~MYBSJdQjPMaRP1a9j* z+7a;QD6#1GZJP=^w#n1*{%0lCju;dCKCnd6;r! zpqNTT=q^fYegZvxs%L8gyS@L=gaIqb?6Sw7qte>k;??28^-g4`)X46Z#LXV8>a@!; zTf_f7AIp>{pJkPj&09HQx?TXz7LGORW2Rg>Fk>Yfe|!5<=wl_YvvtFIwEM97fm?DP z#OzwKZOw|)z<&kFj=EH3ttp7Nt(N_;MQG~*vyFr(Em}|Q&f)kQ06|c`7L~T= z42fPp^t^Isx)NlgXM;P-LxEf=$Y@=OdkNM`iehf_pbxqTpjiZ}+FXVyJD|+n#$XG< zx83W~+Sh2keg2eVi%eJUdZg6AX6)jWENb)s1avASjsLP2ExgRQYBs(Xv(_Zl0EvsL zpk6mNE*cTV90jaIS7_^?_9B`z1cnPRP?n2yL7hv2dM78r)_}KpEri(kBupP*>fNgq zF1KGR$!8+1sUIKcz%*AmDc=C{rkNZZlZ}IcDUnz2h5eLfD4&x}wKu3Fn)geg_xIWg zKPC^ySh$(&YwVar+*`DeqP%XI-hxzKTR}wwhVg5j_y8jfRLwQSkGogB)u(25 zXHexiZ^j(@>{{)*Db%iaX)3H?-^46L5}Dt=WX5?v))?BQx<+0)yuUIqGU))V zk5*;>0Z>fSnz}c_&0y)9i$&a@DOPIDHit&|Bl1XBR*zf@7d*h=RI5A_J5q)y zg*ojWo(QuPM1@0`kgBfugy?}Z^$ zU5Y7pJ@kEZLRGQ0)cXYMy7pnoe*GyGt3>BykJK=@Gi?SQ8up_{EQ=fH5DOt&6^n3P z$M1`Fq{ray(94*2r3Y^p`-zk%Hk!nNsH)6;Nn;_lf=x1STTQ4!^frrRR6PXRzDUBA)+!g{Q?*wp&!S`HEPlE=aD&NZchURnUsLpJ$m<5Y=! zmLp6@n_Lj2{`QMOMdW$yL(Xx*ag&wVEvb9k=|U~vW) zv?vF_;;|l;!PbDbHtG-0kaXgic=axfJ1IfK7+2t$v94sR{_=45X1!(EUNro}D53Am zVrsp5`eB-V5ks(*a@A?T`FodaRl8boKiav_o2=mLC(-5rhaCN6 zw`618x#;p)n6)i|rE9)~0F8{!SwVj2T75Iy_XpwEMHu{)IOS^=>26`lX6ItZXbeJl zu$Jmq2s@?AxzWb;o5aox8d=H1SF2$u#^MGfQ+g{NtViO+<_tRu3@TphDLxv*LfWwq zKa{72(mlCOx3}@>G)8s5MNM3+vJO)v|9AaJd}IpCa9K3As4r$5Tc{y4S=SxzrsLy) z2;7G8A6og31 zKB;Z7a~9@xRu_Og3nhtxN9C-*^7PI`AOKYb%v#jzB=ZUARBUT!>t6eW0^s^LRaDUK z&@Bv!@_b`u6sDC3$tg}hF-q7Ki#~OMv+sWHjVaEeUqJWS8$bu?B!JNzNQ6MU;;}*) zQWsy*si)=b*h3R>hnZ3p!c-!V#1lr4nUmDluTixxr!V!HJMS;P`dUmiG^mQVB=$x@ zR3pcJHY+DWFM&K`Cyi(@Eum( zf{EcHgTaVcByuwvoGva^Kl^?9`d#SZmrJgd=1~~Z4=1Gu_1W~Rk4l~wtfWlZ`PDXj zhRWqZxekpF0`)5zM6^PphPW?MsVB$(lbR$bVCn5rIOG^tmyA-)7mY5#b8M?Ifb@7t zsIA@vMU0fLD~54i_1NbG_isf)X_Xo#8->2rvp3>o|FC=@PkO1xx=VLgmev3X5KfDF z1*^5e(d(JKAX`yMTgonkGN~v={DmQRpkbfWY>QW}joXm(8CFYcOC?O(Ddzm{1Gx2{ zP);{gDi{UTP`Lo-K{-v}UYaYND#NEfg!F16gBnijeqMAMoQ#f!w>k~EkPkBIQw?@$ z?UUPDdpQ{?d5sjWY4fLdxzCk! z)es-6@T@V zDJfmm(xXZ{??@NRF-u}erBkW_Yb(UCzd04R)x5(v7kyr$seTOLq5zxF}`WXaRu`Q0~3{3&aTLQR>stL9xQp@T?4PgDaDUK248;BMJ#?)17yDT z#PFffUqY7@7YF7g3oin`yDt3>2xwyu*2Wgps!HO=YD}~i5F)^hQ5T45xz|~kKiCbQ zjk1t6!_|Thw2{T3j-#k!BlseSDN|tm97%J5dOE80hf57pUH%1oulXLPxK|Mxo|QH6 zMA!cm^Av`C{0^giE>AW6gbbdTCLh>b6$+~4>3`BOSB$FDL}001ap%EGD`A6TlCn?k zP3#I}1Y3pN% z7rKfDKuKIqJit$$ua$STSK;c2pTW(hBSmMu*KY7LE5jNsb)WDW`DD~WEF}U$HqkF7 z*BZpNb|_(I5$CG3t~H&~r8;+Yv0sMH0?y^BRWheFIBW(@uq>C){fbGk$G5uYrI1^z zv^))L`Oq*ucYR_&>KtSa;vO;Evhu+{@4eBngW7R?zWFsS)BE`?-6K+!fN9;t($RkP z1*)<$X5uqEuMtSDzoeI{6?v}}UZ;g+9elQYQk$x9?u>GaxR`lMQ%J`2Y4>uKVIPAE zwCzMN9Fa*LA?wiP;%0;XX?(*-lR@t)&SGW^Ks?6BxZjxNj+nC$!Dv#Rg~ESD%l;!v zR#kjUY-8&wW_ZYCE_UiJ5qnpz*F_s5dpH%%tW5IJx+G;~tQ!00@eRtdSk z?;@yLrBpU>TH=W`7jeWlNKuT0m@BUSlC|#r`n5o!X&M5uSJRg)hDcq{5(SR`HI^f$ z5Grj~7aSY5MYKM^%$dcdgT*shez1*5}d_|+fJAC2vDVJeff%JDq!yP@qhxg-AmViEv3J&N=L z^;=%59wF6Pj+IY!2fZ&Jdy7WC00trU!-!Np6Dk_20X_xvp$sc$QatyV6gN=ko0ODe zM>)<+ibSv#Xe$etzvMc)A7T~mwLDsHhW2DnV{)(svke(Nwt{tqw}-tBzRg}}*Xzhw zdN?0H+qFrrz$Bkg%}vdM>c^MYAjCox3c0Qiqs6`5p~lW+bTCG{ZpFe^#Oxbbh0hMcmO4}LT7u2i~E z$qbJ<1?kHpFva{JHrI>)f&+oc!9fZ-1_Jarl!qfOnNl@4L3G}1Q{&+qKILbQ%B350 zs>GX_G&IV7&Vtep-|!NLQM-YaZ9!xDR}}5OYQtuI?btEex<06-!wo>!D;Dn(Xef;xl6@=O8f>vG-Rw-}cO;J(w9N6+)I25`k z)1+)WMMkffj-< z=QKbjWh7Y0H3tSDmZZcKr5?yUbwINHW-{|m*?;$6VLsPWGA!yqfb|vt$2m)ZrD9cE zS|5$MLS5y+d)D>v3pgM~g`2`61DJy>$6BenQ|AN9CU8}_d4r0z#?Dber$;ORYr5oi zU=LUx|8;bhG3l&8@J#VL5HH?K3U&02BjDJ7j`O`m=7f`|rU z|7#R;EwieQZv=S(n<$zb&$V%mXN>=7X|)fz#MBO~l;sZQKr-t0aS3)iqT3gO@yCwI zzidpPUbNxUn>HVU-*%Yle>WNfb414){N|E&B`9fz_c3`S~X^@lc8?b0=sUFYN$7ZrI@ePnaGJ>9fJPpb7u=~t##1vDH<7yvnu7|Kg&mRD# z?F-kql6-Qi+wZ629!L%xN!?{waDub`EySp&wU~mUmQDyw$O}VgH)z*{I$hw`z^d{U z)k>wIR2u|gSw6q!1~Mmb#|mR!#zC4=&|O{`7&`q1`qC9*qdPdq$gY+nEsgy3Ha^o(vX8!4I_%tUWK92L!E~faXty8;~l0`+>~W01T-H z{~iU>kGmOK;}B`Z_t;796h}}S7uMd+pPSsP$308ac6z6WH(l`S~)8OXUk~?b2U8WE1 z)g}lT#Vkycn~K+2By7i99o=W$B`%G`V7~dav|yMf`WP5Ui~7#OEGWIzGc@g6_6Zwn zr$Ohr+ZUyJWYdEuWg#)F_#NMKx^g+fHP5<@Wz>%k+Yr>Ey|)3JDu(R1s#gMqOR2mR{fr&m;?B3{HuPz5xp_npu zz4wnjVIYAF^SK17sWxtGs?qkFjiP;rcXyQBXGUqJVJV58%Y5tBmhN@*0rsVmYC=Mt zh;-%^TAHn^SRnWhfa6itE;>}GLcBiXQnY=}*=DQm{N0tuEx8H!)3FAGvsJi6#{yi& zdl`hE(6**ZPd#5sY^vd53dlgyTN{ki__p_i6h>sn^`jmLw}5Ag!KvTCPlYqc<0%NVg1xDkr+ zhy2eR2ow`L3=ln%WFQa`uW?21!Md85T4Bk+HRTQQBHJyL_sgkEAg`q5m~@9t>$)lE zC8Le^f&~xw<|T+%yfg40?1bG<{vS7m!zIj#fA8PY^YaC#2l*itne|vUTJ~az-V>o* zx;aC^Zc?gI=!@vXkrlw1`|Fs)d?^2`ynz?R+&2xSCcvG2sHpf?s20-*N#Tz-tZ2m0kHLL(i0KaX<_A3&o4& zy8QslQ;2i-gAPU;ev!V@vunCOezA~Kuy0EK!MkvF*3mX}CRcex)jU7WafQH4H3Fkqv{>>sR(nri;IgA=ji%N!r^$FPdm{cAr3&etke$u^jH286sjQiAApWA z8IN0M5|Jztro2Vg*DAYCcj4a!m5}_?7N^j437~lg|C6YJ#P+>AzMdcM(tqPk!|+9w zM)xJIHlq+KPtNouQ z^T99nFl`vSZ64v_)&Bux$IFeY4wDpRSh0l8U-^etaWAI8DnNuML84l1oTb^O`0_aY zd&}*f&tGr%JQ*f>v=EN^ueII#b<`)4_VATWmwgnBo@M3N<@Dr@p5px80ToOnyQqJo zpRfUP_)2sw6O%z^+bsZ_e0A2H(HKI z{`CEe^y$M-WZqEs^}nBv?UUcxK>sTGTv%G%cWHZVXzgfv>aOtaqT!AETiuSE(lv(? zf8_BqCTt!4$^Cuk%lh?(>evSvpM*-sI;TeW6eBJ0ope1#Vt1XcB3cviOlG1<@ndI> zhK{Zh#I%p#Og-Qn*XT?=C2z}#Ctl00q(C80DAjZF*}>#M9At6|Er+4;wL3IM?I5?xu7!&5HBZhK}F;92SB{rC)>YK5mE8g z_VU$)Xcn2}#)%9Akl|v0s~!$|6;2L`!?~If;RIbMT6S3bh@RW-j)l{Su5}}ItBX(C zuNCKKefgYHWR)rf@*E3$0)^Kg5hEpfCE+&ud8Q0vgZp?RZS}ceL~W+78rS`dGF;u9 zE1swmF_%<60;zT-L|;!FrkJWl$M@B_MA)R&c)y4+mH5IrJpdTLN&EJIwz?WN(rAa{ zs~2(Hk1eTRdDs1lPpPE3$5zz6-1(4fu~0+B)SH=|&D*z&hSs}i2m$~lBqhKg@!hVV z|2d|`f~dY8R)I zS*P;u>^pu(Cq;}SbWl_$GB0PG=dR$qPG^Pesbds@>oS&Q>2ICZ6Ra0L;s$15kAh78 za?uE}8tN^+S$8p5x^?jS4(yNggN47SUl)|(l0Jo>(3~(Gy_|AFE%P72r-f|ipuV1w z#!frjORb<^dXFJC<%*W3>5l%6q2CEJf1KNI=d;^FD$>M~L;*$XsjnBJiy6|gu^p*F zTdUIsb{69BnGOZ*Fko+DOVrWt0Kv*ne(9S!rs?;%IFHBu%hnq`y~u#HLCXB&ikZ~M zDzt~~@)@FTk>-=Ojo%FHh51{kEAm-;VOUtM*WN#bTX1{SnjuXs;hHrs&8YT$b#j_f zQiG)JoMKMl9T0oBy;nL{Y|=v!mbX!WzxE*7qmB{ z40gwg^(%{Ic?Q0$a?KBWW3bHR=+8^AD@|3GzXR=C9=P(!Lvd+m<93QhN3E^=(6p34 zuPryN?uVGRpXh7u)MnAom>#gzV5gN8(Yty@6dEf3`OImgdpr0&Dg^$|@?~x3K+J{4Gd%b&?^X`pmYh-xiU~wA=#B<};yDTd)t@ z^6W#Ha(qLdIQAoLpf0_!`mY#Y&+(G`d+ff%p11BB=exc7x;8KK*Fe%t?BgS6x)(Zw zkN?B?()vyi`R}J+&QrR_QEBhszZ6BGm%R?sJ4{?C5yOVA6llWWNhF_f*`$YFx8Clkypbr4JZY?Io*NHY7biUP z_2GIPu(pP=S5ElmO7dhT9AR2_NYgioe4m@Xw@w@#&vLg1eG%t<+2>Sxc8{L*LsUUh z>A9KFtk0?GOG9T#9S&7RyhWcCK%*~hcewz|36@CguJ$zzmYSF4*4XeAq?&Je@;|fx E2hn-)!Tu z`+45)hxaddyH3sNGt+14ny%`u>7Hx;t^C^o5UMGuDgjVX005L{0sLD>K_>)Y0dSrL z7l8k)1OTFEApnp8$N{9!N(P_<&;Y2Oh32`&17H9!{U7B4C;~VE{LlKoM@4`XKn)-X z5Pep)=ejGv0HE<8=^Lh}* zzcKR+9s$3lMbk3oGq4tvi2eaow=0_Cdq8VLLwk>O)p@vO*EMTEW0Cv*QO!hiu#b_1 zME=xjv0u-a8BsooZN1ZN4|RJl@8|KsJ33A$$nfsH$tHne#^C->lGg-NfznyiRfT01 z<-1;SlN@Vb0mlm+Zn~~+wg!&gc22hu&X7k{=2X>#F;+*mw7!wQ+pt}f`J2BoCq(m9 ziv#hB+mFkj%8tJvC{3bCa=W76Uqg33VpYj}ftf96!4}8HCgYn%9H_g*M(}ds?2gC} z&56UuZR>0NqNL0=F@KC&rJ{z2xTBDN08my;Ms?oEb}Xm+tV&oahyDQ}Uv6M_m$>AT zEaHT$Q0U_(9m+Ej=JLstoP+c>DYh5o0?gV*d%=!ClUYX&cr}Uk85)mz(rw2NsqNHT zll<|wnM*TNC%%6gxS@02f#NfHEuCH3Av*#>Gha`G|29aN2fTTFIeis#@*uCk<(lo< z*wg>Ft5tHn?Nt)r&>^8gQfnEx<+$*ULUo%XNsY1x8bzsaz?$H~2f#CoR0H<1(km8s zZ%N9bdfBzZo+a#ytwk`{KqADl*J*nH&(0Ui^;>+pidQwx=xoLzexba03x>sn<@Y@! z@X(NjyP&e&8>3A&(8TSkf!p5EnQM5q-D~hWYjVim4pHOQ(PJH(SOE(E<0V`C*5NR( zY*NFk1N*+GJKgQ08_$t+WEV?GU{LVo|3G22DZNowSDU*i`&&BE^IiXTb1AT@nkB@) zDvN_H$sq*Ip7uy`v;8a2XuN4qT;?*~iJ~_|lQMSGovGN0qxB}0g?8X zsmt>p=Qn1bhT4() zo3-aHyhjFwHez?eKlvSW-$*Ki z6b`157kE2;f;>`2SR9-AkNr1vn13+mV&AEY&nxp5pF*0kcOQJNs@q+FWiPj@Boxwu zPauJ&ErC4;hS()1l%l9U2QygNND{vQsu z{8Xno)?}rGH;nru%k4?uv|5F0!8-Vc3w{i?H*#P+{s9E$vm#aOkDn%vxW&)dojC3Qku)FV*j|?{?MO^NEy{Yl zcApIHZaw59tiap_83k^><}PS0$te~_`GG*f|Lczmn;iYMB;QZC(#w)43mAnbcOtIf zS#s?;>8dQ2!=OLQP*Pz2+m=di~HBrB3p6a5|v(*zRq+E)|lCC^XiCbMx{n{Xvs_t`4x^5lgLJ~ z3DYLMNSP`7FGdQczk{77JJuA~0_ko9_(9P2dIx=S-Y-GKrpP1*+ARZ2IK)huZ6|bn z>3!6Z?i90hhK#FpBmY!?cl>q*y5^g!3PW5&_00CFjhhE&vQTah2}IjNFb?ns`x)6m zM`*n=aQzYziqzpno3JD(TL~?s$dW?$+0)J!UFS;Jwf*>M@ISw>NE}_6iD#<$O(FSr zsReHIQbJJhz;b&*+TF0CX?cq3gRfkP1atn9Y)f@Joz(AN-o`v6>vlE66gS_jm zKFF9_s;uOix0Y=9!nx+99VRY4n(qfMvkE@@aE~uETf^pASLHc9J9yBe@HU#Ma(Qa> zRG8qm7N8yH6Nw(v4U)kb!EOIJPBzfgF9MSy{!k`ysl+MM>G zpsd2NC4N=r6LD3tYTwiuCck&?_O>}Wg{Mn{sX;&E!HcYXq4ka-=39x`&PY?*>E?`u zgFx#3VLgD^N8IgK`_1)A@w{wiu$?IiqBit#Mc2Trc=ltL(|d-)pMTxwJPM;#5wGX3 z=`>F_&O6u>);FWZbiqgS*uER|2*Jq;;o91u&jOJPUzm!XL9nF;kaW7%kSS}HgdPZB zBtL$9ugck;0A5gHe&Q`n`08+f=t~FM@1h-1q;#?2_B>9=?*GZ;o{uNH$(7GPTyo5K zQ_4tqWwRGEJS5o9p5>bAPEzm0?%mpp{+4%yqqE{#KnyS_uNsq@7;dOJH5Z#U>Hg%o~>@qKUbNM@dXtF4u`GmR9ofEUWw8>RTYY6%t zKUk#M3DXFBE67-lOgGN3E)dBQPnQMvq7}hC-3t2Zq$hspS2KUrvl7^8+*Kk@Pj}K6 zvVnP=Q;sOJeQ}63n{)S#?>1~(`GyAHT{Eu0)YeG6KB?4;vtVv)e0po)xZyEKEEX{T z4N!F+?CH*tTe-R(8UHqU=JfNeep*?1q2C6RkAGLiYe%GxDo~@Bu=5z+H&Tsw_v_A& z_SK!M>w#~3yDIW8WGTzdkYRp%%cJ01Zp~z2w}eG40jqfM^+ucDw1FiiD1>|y9&?)W zhhR<`1TRX+;mTgFhv^1yjryGuB$d%QJD6+veG7^D=0Y$r=c5`NuI?~rvbnS*ciX)G zJVqDOx0buEF^^+>wUk=N+o||PRRm>g54p|!tI!T2wk@;@yBZoeuO#!>YPxd1Ke;Pz zL$sj}N8}z-WYkYGKPd{vDDozJ?r?RpdoB3zWIdj|nJ5rF(DLRKobwLC+2zC^eoB7} zwM2?<-^H4&WFk+FmsM%Ir|k^AklM?(QZojLCH9f-Wuuu;xd?^&-s z%E4nP%G{oJl+so;n(Fz)P=?=pMB5Yat8RcOFu)lu_tZL z@d}_tt?#0a9EfSV2~-d?4BR^Q1hq-kT!zz`mN4X@BWV<|;SWfX^cx6+p0qPrmhQQ-Nz)UH>4S7TK= zx@MR}hD%>%q!=t;EomC9_I;nOAQT2U%x2b=J@qgzgDo$NOl z%NAOpAp?feGL)kt{sz@75_L@;U$^E0mJ>tj?%s*EU%odV)SGS*hT)t}kDdKZSYH^w zb)(WZoJw`GOS4}PTMJ0n(UkgNNX=wi>-b>z4}Wy(*Dr$!!ORT z@mg}SpELM${K5fLKyNZfa46ETyR^-^YQM#PX3|U`LMOhb^`{2=D+>!-LA`C;sxY$6 z*9@F&+g91(!)b4}Gk-$cDSlS{_Vt&?wE59o0}fMm6NaX_?*5&!Xt}?t4eZ51bON z;1Su}81CM-_zY{5LlMnITUkCGg*$S*-IHZF9I5)v(Qctn=5ch!Hr8&RaueeMx2y^y zSf}f$(P{X#IsSb8*;(T};T%p2hU!?m5b?G70LO*{+WrA_v?JlFhc-0@S!%A~&_)(> z^Hn@0UqSIyy&9jCApd1ESu0=jWNyT%H`8aRxq%1C_B+q0N7{dY+yv`DZN;Seqpid| z=Ea3i6+XgTGTr!DKZ^Ff`;5zM0!q z_yJGS9X+qDp_t8CHOstozg!`lj;LA;t#`t72BRIpJ%PQeNZW@z_!ts6J;5e4lhtaK zXdA_8oSovwA?kP}Zg{yiTPZ31rE&HcLL~Uzp}QKn~<-)<@V1R9E+3S5-~-Ny#Mo(l^^Uj&A7mfeK#IC4fSXr$=B%DV9UE>@T(Lm+>fEbgwnau5Jt zv&cj&dLQFeb&snz1!rNpmvZSf#@;?i^dNz1peA)HlT;N^MTyRb-o7 z0+W?*;d-LqXS47K|LNnEkLOaWn2j&Xn0%NQ>U~)~ z^WEo%P(4r!l)t3lUv^C%QJb!DTuYG`b%gYe;S?T=(`V&1yX)S;d8Iw}I=r)x@UvGh z;XX{|ZmX{E&y&;}?#$4BA`xf0EpLLxBeAswvJHPa-98G{351J%2R0?yWok7WnuK|v zp_0eaX?H+WMBX0S*+Ohwcu4K{%nwo{+SaXG0Rf1OIH@)p0xZ`Mrj8ddQRWmx z(d_SA&@K2{az-H&l_0qV*xCdoTBg|}ANIO0zs<#rzd$kwpN{8H5%f0FhmY*mpYuId zUtOV`sN2eE6YD?>Kb1BSysE)_shRywWC;oIvv3C&Bd*zpenE_gq%g-M zMRWq1J?!{>=?F?9x-N)5t|w$9SrCkTNv5uxd*|8-Qg|`NLxU6zj$G;Hly~DxD!lJ3 zel++ZFcTM}Rc}S^%a;%KZ0q6XrgN_hfJ~O(=s|i@^S9ra{RZh8z<%uO`Gc2_ja`@j zw4ztk8As{c7_Iwv$sI*$<-`2U-xyfAx#7iQ3GCC8E|EW*8j#H(lM zdD7f+s}Lzj3+``s)zlIHZM);_PRs?9LJejFYHKVf2-@&!Uq*~h%yDXcORg&!O;NG> zT%JzXA*HiC`5aXEYLOE?)d(Tl3J-5$YcTe>rvfDx;mghXoXH}^N|`^fu+!-+P|pCyW=yQ&;fa&mG2vd?KYPe~Mruj%Oy8A_dwx)Vfgqxab4TWS? znVO5?I161irgWXFq==Str>TX7g_XIMK$xD(x9RDg>i1@XX2HYdHJW+mtV!TV731Jy z+vZc|k0ypVPEoU97}l@2v4hJik^Bn!+aF`v@48#Q7C3j^xO6#vkiw?O%$dP~Pnb6j z@pM;$?GDFuE8xd|J#5miFo9Qkf>+R@{r%aC*SWOh+l-d^JPBYvDi#t&Cs)hHrWbR` zSVKN+gk}aX@e$g(_*2=Umt7cKQxb|DIAp_dGv3Z6$L4uxofkD;*63Ae-)ekLwm^B~ zLp*+Sz&EsPM#{Vl?T5n7Znxr;(kZufq=yTqK{QA{TE+^j%|#hge6PeKOFRKA!3n&1 zR>^Qo{rRx(K^jbAwlg~I;_GBJiY$6U2?arb+a`*1`UxH5m>P->m*3+&q}K@NI}t4y z^X^E@6w)xND-RCrso*5+TxVe(@=5>1!nf2iG3*?C^GUlbFv0ie^M)_Aq`7B>R%$xI zq-j83isRaK{$(%c`;I+fs!3!8K!BSdxTyukzNiKsi93?PT*E^wOr?`2^b3)P!7RXNxb5YzjfUl{SxTJq+d! z&zr;gYOgrBBlsefB*`r*(nE88R#U1qobAi4Q+%48<0#NHv(Mvv#R+aP=MxfgWh3@` z&Y3ihdIFJ(b2&UE_b;=xV&fw0{iaN$mHj>hYe zy5OB+p$#q!TKqi$fN^G)@FT4#CJh7HSpxip^D)~VPeUx zdF#aBGZGJnBYl|k?L!m{S}Hwv6+JS3DxTGxkyT|j;$_V&z#C$4ng^SuhHhS@Ld5f` z=|B|%@6$6KGu68N0wdzTjVBZaQg=g23%!4%TCZ~H#1$CY;sQZ*dE zaDP?H9WntZABjJL+MGuTv*t#U^zSVsWc2X*hM;L>#wk>BHWCMe@mzAYip3ggm-O}K z`X*TRz0!c)aB*wj1~s)F?K>Da?q+B27WC zh{b82&|eda^asIdynR*zOef?>6QN%5XX)@h!Y;1mXNS}0`j#-5u4eeANbqz4beYsT zhhFM_CVdfL=u#mDNl%4HX2j~{A@b_vVVSmVok8nm>q;!39f3`5Jc@V@0RBHfUHmJC zLJt0T>b!}I%Wm(GUnqlj=j&S=B5O@Ti5|@J7w9FpW?DSAtrm*s!!IudogI_dIWjmv z9o;9y#JvaKqm6v!a_7_7`}%Cld27zb~o^Y0~v&C3hJRwy7@kTkaQf% z$34zrV%?uMdli9OM0OE27@a{YP~es0=T_SlygB{)x{NnFdaxb7EHg!la@HzVHBf^v zt9QaAi(sR0lqWL|5mR|At%@?d+O8^v!Q{;zhv9{_V~W$Wkl#orQWeELZK@^8&{r*JVHms?$!S zdBZNxjJJo}s0ym;Dzm}wBbiRPt@4lLke?o#lew-n6I%}`B>?FQ3=@n1Rp;|6@(=Dl zm+IRLUQ8RBz}itx2q|x}&+{+X;+=7MKiCAiB<_#kr)J`dR6#tnIqM}lPi|n4ce__c zNg;Svxf9bP9jam8rIFQ_JJb=53=+^TFyum?_HIH(DvC8Y>S(#4HC7BUm>NU3|s z8bozrY)da4PM{EZF1=Q;l*N$-`#>9?mIq@WXt?QeOD*;$?Gl{z*ZiDx-19-g=7RAo zgnq%3O@}%PUEx3=#&ub6q;Aip2Zd9wX&r;u+rf09%0<(k5LlwK7+V;7Zd1M}=b={F z3hiNV$to20&+F35-#jJVTb=!FV7$<^INy>d>LNJpR_dTnXK#(r77*a5)jT}|34MQ< zt~uAzHF4STFFw&VH@O_4rrc{~NmPx<%C4=grcXz!ZkqHQjVQUyueU~(Bhie_t*Kpsa!WfsEFD!}U7OyDrZ@OiT?xT;J_HK%gUDU+F z$XfEH*BqRzPzjB-T(Bfht2F0vpt;a=DuCUi--#mE7H4IYG!?59-t82bkmw#$J-Epm(+AqIsWp8uT;H)+#tk4pd8II!M-h>0D(FtSo)ZcgWRnElBA@NtEiW zFv7(7;fzyCv0Y$xIjVb7r<>rCULc6X?mv%&zNt%-gAGbl_~-Q?1*-@LNlG47`#D|H zS6L=4nGa7ZM^F#$dt0qgZNia<9#1@lYHfPXJYBBJycrZhzs=u%=>Ft!o1vzlwI0Ed zPUfShsi;JbDZLMeLauGdi`y{T@^&PppMJdtEw8$lIo2U94w`^RLNddq2~yDcTe)I z8_psEN{4X_vctP5N-|<9Y?~Sn{t-k%*&T!6QU0*7%YN?2r4_WQf&jem%QW9}IFWpy4_Ovpi-x-*A z%<{Tz}Dy7)`y^9US7h0YVLq>zRlr{E>ONp<7MmBk(e;3-Ds~^`Yq`6De(o* z;>XeQi9-wLx3EQhQL@!iU|A0jQX=t1?nm8>EFNfMaPH!;)MuVzq(+54CDcb(TY(5G z|674jf~x*xC9VWQq1PW@${G({WiDzXu0qXA%_~10jq95hudHpAc5va6Qn|rl5y%&`3Xf@Mu$wP2EYP=-Bz^y{VeAp&kj;m9gB6QuQ zZB>v`D?qZk{ELC>D}szUeb=gy8HV6yUUAr>QWRTwOLHF0?*-{}9B1KuA34G4RSZ(xA%tN*3pnv~6$hI@=F{ho z)O$oq-5q%cW9RWT1@rlxeR}-Pt@dPKF)4zQ+*a`&u`U2=WvghX;pryqSn?2pCn3~% z@=AzLvN{PV+P}JaH2ztF#7G2SnH=i!pn<}ydN8`^~L#MD<5jq45ar zeAWJp(#^b(LGp|?JS!f^&k|SSp+Qon7a$ooM>d~AtqPI6FxktSir}+DFJB2~gT3!S zsoXS8Gs?WJba2hW7GmnCdaUZ~gn;RVT#J~5eKgs!o_XQ@z*ahvL`<573^UiUw^zja zB}8X@w1m6-_MMZT2h23RSdndBI;tf~X)|f0Rkg)& zW;bQq)^jnqL+spg1jX^{;?>J1Zc=t32v;Kd=~y&neWR-yy&)=V?sqc1@Y0aUp`+g~ zDEKN{z~)42#m8*;7wPyK$E!$$adgzfZYrM$!TY9$48F*GXWvEJpg(ik9hd@4^JB|rUOLs3t(-WK(r>d&x6t+~^lFS=o$a3-za4jPWA+Jb zIn-7{Z}et7k+})sT50IB*yCDoG|?z(x@!~@hC|;s5O(#`tGXa_CC}1Ua~w&Ua^Hg= z#P4-zeS3BCOAP7Ly8LZL9M>XRev>Shhx{1hw70qZq>Pzbm;r|>S*V_~%)hx)g+Y7R zq-0F)E#uM(X4WOF{J~Hz*A#jMaC!GSe_hMujKQfMxQ%fKY1TXN`^I!&}IO=|hd8BgbV?#oIgsTzKID;W0?i39kNn`Z&Kre~aIb zt@6~TbL}wo($rgJt}1tkp9eHjmCwl$m3|x(1b#!gO9iD9t6=rBt4&qS->>oi6mydb zA@c3YGybz2!wjqNiszVKyC0KP8tEIjnm_({=;b;!_+Fs#bN-l@#HOX^3)bq4R>?AT z3Rl*3d$Ml>*3GRoWk2piZ1eZ$Om)G_jt^x6US%UBblQsAN(c{0na_)ppi zP1y6X-rB8;LbzS4<9_SX(GL+jCqT!w3}3smz^T*KZA)d-3Ro+hK#og89R)TM#v{_PYR!GokMHF6ES6|-xCl8_GiJEs@4Yi=AKl}S^a5sBQ*s<$XA-sX z4Y%QemLZs5KNfRau8Vs+rM40_;SlpRn20_T&bENeY3U);*|+1M$a<$O|G{a}oI4fW z1R3hH-0?HJdTkCa-#Iev0`FFN1~tg)f#V5}Lt>tlb%ZEc$fD!CIWSZ2F*`6&!D}m+ z^fXo#h_KqeE9lcP?l(Z7$)qGVb;^CU;&Bv8)91|$`WhH;km@tWoT{bzQ{0#}dEp}j zSY3PTbN+}SX#Mi0H^@wN;oYqv`;2@oZ`@&nV&O2ph(r`g+w08kAXuku`YuIrJ%s}9 zNpUtI-PDqYsiJ|Bj>DKNlABj`{>a>#gUK;D_=)fK4n-mM+b49+_jL{g!^|mQ{b$n0 zL4nO$trTtnf7^+kR#c%bg`Q;kTqW+4iK0|HT3PN{I?MCASGg$WliRMb{J~8yv!D>$ zlu^Zsg`YFw`_G0Nmu>o{*cldZMRLeA22NZ2nr}6iv3bN|_>_HLCXoVJQnk4Y>q1p~ zho;6{WFINgD~9=z>E$!|hP_i0-~9J9f0vV9yAqx87&kFUsloPSZQM`G%YZe_h_e@` z?n4e0ZRF`R2^(5c*#$UD^9jCCa++Z)v{>K)F7mU1g+>|}rq4>H7*tg;Y{Y6@{f_5x zyF(SPK}MxM6Igh01kw!9--U`X7j8;Mm!+zK+6*{zA_mt2y(k0o0>&T&FoS2Wl@%wfh;Nzs-4zLLd zt#eQ%k7vD365HO^QG(h?J~$P0A<=Ntd0RJLD+JAF{~Qn3Mj4qo6@Dxfe^LWZ82ynp zRweXdz@qq4dS5ZiylO%s8I9@V2#3DIox3}4Gq8|=ftJOlxs=@vEEpptHOZJC{(7O< z6r>B>hlCs6QtKb83>tB4jq1sgjMWUZL;e9+rV4!Jj?Fl>RAD^T3ls!}dSq{MMW!P~ zWoc!_$Z~9(^dgU&9==1c8F^ouX!UtRSMo1g2I`eKC4;!xL|aM#mM0RRwlia0Fx<#_ z9PUHyapaB6@@(#EtVf%^;eGRGr=oewad98@P@jnGcw7Wqb~byaOw5dc?VuAefUv65 zW`-#~iGxFr}W0nlJ`1rF|Wq+<2G&l0~v~CI>sl<&spZ)

KFWX=U+6 z^Z2FrMJa5~?2AI(r}wv!==1E%9vN!H(oFL23iyKk3mD5IGg5To7c1~T&)<1oLnfMy zRKi?qj9)*KW~oq~NwohLY4$(PEIJ0}|4?R837@I7G5`QxY>eh2wcP=nf<4|(NCOse zdN?L=R=l%T?CLg`F%nMk+~%cYj4+eN)O$sR{iwsbDEo|BH%}<7a4mCKCC^8c;x@pu zp{no%(hN3;Ng++3_wl3~*>SbBa^Y%Ucc*oRCEM2nrfO$=ZOd*}E&)n9-T;#X zwB~A<3>stCsj`O15EinA%Y2m?3RjkT+Maq0cnDcvCfSjTt&4~JFj~B=6>I`3nH=ZO zdqU2!(Z(D&-=mTVancJs&M>(paW3_rWTEAxY5Py?yAnk)8Rs4KvKe{mv%XRN=?Qs5 zh(n7MsBHoI(sbA^)Lb^t&4UAkgy072(eV|n$pvddw@}-$$v}Uk^+icEx%)d=U&c6y zhphfpMUmW)YYLZcE;vL)ACSSCGlg%PJ1{XTPsnw8Kh+SfD0(}Z4z@5?)a_G6r<_d8 z2}&o^pdU1dV&*)NCu9%g3rCG}{`>-C%jB4J>6fCqrZ6~MfCOEYuioYcRX^^2c4#iA zM|c2(d`U9--lMW93K8Ll#T->=q#{zbWNCIb^9n>@1VG8@bIVpKSrYTvXQkL2y&(M& zgF)C76QAnxtRdf(Kaj}SGv<&00B&nv zdG6Xa15p7lBXG7164OZsX~<}6NWE`=d?(XIZ__gRNE_8vJg!ifeq|vePuMK>P*2$dMn@Kb(uQ>SLJ6 zuoyWMRWWdm0j`MlV^jcNU{hF%tbxj;SekaVPVg(4H#G8A^wvHj00M>0KtR~b%&B>7 za@$oV{7@sBD}?VWuN^*6Bz81Q6THiYOh4y8GYKZvdh8qnCf-S00wN0t1Y~lXWB0pS<^S@UmtEiX zQ)wRXDgcd%>uoI+cS(eYamAH}>8Ut;#Sn8WPRXKkrH}ezuqT@-syL20(8~Dn?hJrp zVQlR3pNDMP*ju=x_u zQwwGR3_|=97&PzHTv!|t3-1|}Ua5$8q4vJ<=4Teh#>3I1252zrUE1MUIz- zK4rez_sKB`M0K4c%PW>aAFFq$f*9+b$uh**BkTGaogtp6#` z`RRYl|7NWh|C6@R{!3e<0CW@P^~|H>u9|UD#V{co{jka=0Dxu5w#G3Q?)#pDG=m+O zXOpFOlnMZ3n&3=zYFw(rxX5r*CtMoI@mQMp0f2N)M$^+%wUF2LS@qbbze>mSP15y@ zSq+|V(?=U^6e|1l`|F}hX-UdcfphN6(5vUu262U)ccxY^cj zk9)M}el36K_h?QVdlccxvys~eTNHAf zvZkYl-?|EjxTWu-t<19$9?jZXIs0RR1GAJ}ol}c#oUFp6+-X54gIKND-0I}5ePpL{*3O#$78T~jIiwnC26$3LnH%1*!i3VvP!-$cav%yfC2j$3? z19-|*-(L;I(pTw4reL|IsAI4=S*zf}vC>Z_KUTKY#E$}~-?NiaWp9LarJ7)|!=12_ zpQ8*YTdsmzF4N?M&~V}p11O@V4yv#_CVyOX8PsrVMo%sdS#n#V( za{zP+N%g|m9n(=yRaEt1lVw<_$vDYi9Cn8VG)^5AAUK&(k6u@nP-nKJT(dc)lO>Yf zhzm~6g+&U$VAq9Xsky1@4I7akgoOi8!*ohN<bGCmBl4WDbH<4mCo$mNXcp;ktr7>q`ir=s``aZfw6kanhm$8=gz!yK_NQ*nemF*?I|N;C4u zqi{a1ptPxArs68vk=Z1dA*v{I*f9W@$zcwoEe(XGM5?|9WckAYQk1f195`^F_dGVb z-1E2~)Gz=U2MUx?uf~zkC>ggpMO^?j#v9jBRgQ`u#lQr_q;Y`en}IWgV#~O=h)Yz> zk3l{Ln`CW`j|BNC;!K9uKTjb1ga9-EDk=&ZIw}e}D%yW9!Ki2ebPOV5LJ|f{QhGi{ zeln&v0hzFPuk>H(o4C8kC9j!JQ0EVrIAVzGp2X z<{vG*?Z#jV?TghgH+Y7oZNJBf(fYG+RK$sg$L;19MB~ogjO*sa_-0?z6>pST#6jL}{eh`) z54s1sXsu8aOK&yE`dG3-O&-^7Mqk0WK#g4scXXmKSejocheiCl?p~+*9Jh$)Sfn-bOX?>f)+Wo^;XpvW0xYQv4y#b(dE-}2s3 z@li=pzeE-@)ks!nDgqX2zghQ{Y*I;DdB4??BRz!Rzty&f^yTi2I=DKPUvY-kB>(KV zY6dc3f$U?~4Kx+ua1yh(6m>Hs_4<$QfnCU;oOS;##b6_nqW!95z7$A_X;J9bxM-Hj zIQ7?&9cs1F-fp`x3BgJRlFinR(-~L0l=6{1b-^A~KGQ`-?(IQQyVRMAm3J&Zx$RY? z@SAuVo*iiaA^iSzW*6y9sFY`Lug&_GhnGwv-?#N9T*ai|7fz?Hj%bVt@rKkmviMzKE0_9 z7rU{e9C7cb&T?$$oWetKN=yxl6gZZR4x!Y!xO81DH3#}b`Ontp}%$@ zv?+c5)~jiw_Iu37oSXblQ>I_nbRN*M~gxxL1$Lw*GoLId^t-6@5%o@>^M50ODmq zFA?)PsV-T=KZNdGCwiF#<<9SZwEF1grLGZ%;ALBw*t=bB#96$%>E`TgA080M-048< zS#~zksq$9(3E?k^EVvn_JUZp3%(d!mw2O7xWX}3nZiS4=LMIE1t`YMhn@w}jviO9J zZENQtZm8Af4{_68TEy#z_7U<95B9nYy-3`b;51k^caH@xP$_y6&?3QFi~SonaKWe!+C4y!rO6u0hb%y)wtE zW4Eqj5#G^buP>VJ+7{n)9+sX0X+&3B1R^(<7Y|Bv+0%O9zr6E8#h~EjfUDUrwBM-C zTqhR{KX=Z5;by=GrO(dS`~2tN&9+VI9Qo?1K(4w|t!9Yx)%i`Z!Qb+&cw}=D ztkK4C!h)rAjk|qfJ5Xo2=7-av&r`i$W|R>2ia|Oj3AA0gO1kko!sBJ)+|=B=^J^jt z_a`-1W}ntpVIJWcUTzo8=FYfp3<-=#%+ucnYg}ZkJxX;8-z>aOU6E$J7n`?q2Hi&0 zp~iV8e7fFlmsx{hqq!ufU@0N)oiG|QlL{KeV)y*^J*mBQ{Mh;QhWG3B{Zhr; zz2{hnn7Neh0KyOUj&**gq{a9jfG-0NYR$C~>pW&w=>r658793@d1biSJnlg^_~S1E7epOJKN3~7AdIbs+Rg6;8=QC zn6AABBror!n*{8wf$(VhUtmvrZJD_SXEBY8JKkg49hQBQ`H&WHevD}F=$e~53dlZ# zp0x7+1E{-}yLom$`q-UOe0i}(s>)knz;lJq206!KnQXSvk?^DZqTT0Q|Lq*;`p{&C zB{XL;DCXvP)vB3i=#=PfS0uhv*5A6x!YEhQV|)2-Z(N}TyMpL$o3yJNepg2FH2YzV zk{KXxxv%E7^$Fuu&dVoKO?P~WeBxZUv!yl3x-!lt`=4~xxfP_lccQCJ(t>pq~5b>(ty;!dp~}8Vl>I>my5V4`5?w*SXYrP;X@gkkUGZ`VIM~(iB+31wmj# zwK0Tr1?v6;+Q!ej3O1|ywdis_psMNLv~DGza==s$9hYTa*nh}&LQ2fsAc*Et&&a@} zCC6YTBP(Tw&e#4-lE~>?ROdde?YPdp+d3&BPtbz))07|?F z(X;FBZs+>J99YHG<+|yw8bu2C^qqOC{I*H_8kTYG?AfLY?7^HuH{y+rc1o%X!?^k1 zAx13M0nsZ@nGtS=D%9ypAF4vLH7Tlgq4CSztRZh3Dgp*CTPFDVMQg?CVVHjlk1|@T z)b|Dwq{whh!qq**Ren?d##0Xp$I+cOuIgOQi~J?UF=bJ%&*QB2N(If0_01l{WIQe2 z&iBJMUB&k+#@_sE7qHv1(cp=Pc0;!*`N8Lfe5Ja-r6EFQiR#IZ7TMZKCsH48~gWu4KEdRHRyRTa3o=gg@dhgnv^V96=sqeY;|UPV(Y)rYC) zIeo);)YWC2-bJx@HRw$FRlE}0d*=(vgc0`_uEo62asB@Ly{sd0N&QU>k)wN(#RFWB zRRAmH9)YLHnGKP74MJd7v^P=R{X<-44xQ1^b(iqeQ_Jeo_R@T;w%PGI3QM&%J6hxF z&-DQ^C9b3P#LIN(mx-e~HHyJFZ2!Xt-3 zX}KS{A`H7rJM;UZ1-(v+*>6Rh#-}EwgnqW**V1Bg96TD%|9=3&Ks~?Y_Mpyk0>0`W z^nSVGQku@gp+@r1vZ<%~=k9f5qfM7!7s#k2)fk!Tl~qjfTqotj^(6?OFJ41`ZsxO$ zmtTiZ@&5qsk5Ty|_Ea36yRp@tt6YbLn~>wKIDTFbHlE&=-yoa1JnXsd0XCDAzRjV} zLfd~?OT(V^8LNxx*!i$Oh|j0F2ybzoyP77OT%MX+LZ8!A6uMr*EwizH8;9Axo|{Kz zwV?hg?$cY)O7m{^kN(&ma^nv0G?yc-n#W^jA6B~Cn$e$>G^(uwr|jE`>*^h^rg&{c z*@7V+`{mkcude9SZIpcjpOKbP}fRp+T6 zc?v2jRO^a_-Q;vr+>IV=-r<#=>#@0Q&At|}*V5v}?&Wp)U6T=RD^cq9*<(RP_coe{ zqLy}Ti+9OUm`lu!Bzo6yyt6}2HJK^|_No(|FOJfYw!~)dKlVmxI!A*^btS+gh;> zUx?m1!ITI7ESFyRkE|0~T*uZtU9|pjejAMpc5kvPO1E!XuX4${u{~AMdZy7|FCt7J z5Y?KQK;RHb`z@b$kNRg`E?>v}@O=gm_{l)Pa9Ft~!xqdzQVJa7xm}G#<2UO~P5qc` zCYJ3NQR}Sf^(3U5{-ZUI%IAKKjg}t@4t*!cYwq!%fglcDW|r4|deHbY>K*z|Sx5OT zv3Xx-ji0fl!fsH{OPLhk`fHJnw(=F)N02X|dz^9Vtir%dT?!M*Clex%p4mqhn>UbLgG@eXc9j{{XJm=G!jU%%QL( zI3u<9=WZIwp-Qcx$!hIS>cmNKzfE!Lsm8qgmynk$tA1B^XJcnqR@Pl#r+2Nb%l$D; zC~;gxQTeS}>vB81maI7Fa~o*7ds~PPAPl#grd@V6}eheN_b?s|T0;1ILThe*M@d>+|y2f>SMzp`U0((VdyhSzZ$JJ)f z{+^z@zO|!m7vwdawNq@G*0iATwJGkp4iA#o(%RqT^v4-$JG`BiXu_4Nx>=1E1iNcl zyGy=tpGQk(E1|N}r7DX)KuPu+Y3ZfJS5jQMZ0_-q`WBRz3G|TGzb*9smcqw~l$(Iz zCWqCXR31ZAWcLz{%WgiZ>hoHa0D?ZWhfPk~IF3e<<98X2E&bOvb?!})U?I#C6)FPX|1>!bS2VqKqkK(v-3CYniF!VTG^$s9OJ8`&&mxt?tlDb zV*Ijz1}+eCn%e!D9_In)!E5j?+Z=wZwyCGA0E~`%ErMsg;nV&a=M&YjvcIqY0AKul z$;SFVy#D|$+LEEzs&detp9Q0;txhLM{{WJ=2BJJC(^|T3U!^$a&UzfAuP{Pq)kw@a zA8*I#cx$$cdG)s>yeFx+n_Q*sZbO!3qkvtw?moL2Po}j>rw_fgt7vNqbKmpvY7O|l zJMXQ?@lC4a{QHBbTJ)l>#O~kQ>XLrc01qFzuLP}4sk6K9CdT-rr)!YD={nlmGuKBk ztu|Kk`}+xKYg=l(p04K&NGi5UePfc|9Gr-`UcPn_AO^2HdCj`qbUqEO`kv zkv2c2Z^f=?ZE4i))J_NLl3P!l-P+munw}aE+KooG?!*t&Jf79jdS=$A!0qd}jExs! z{3OzxcQdG(tZLY7;!W_Ny9mLb6$HZbe{H5+Zi8LKj->NIG}n2&vU6oC*L#d-J~vn- zwjl?XB?LzwulI%zUcD3h;`|W9(q)Sx1;R=(tqwa=rfU=duvek`&PqW~;RA{K^f=jE z><0e;{ZJMO}EywR|qQ{k^%bNLaFaCZBc0T2G zLT{v&o5>AF+si{{XXJ>Gc|upuQQY$HKYGXmL(x`eZHyfw&G}IyZ(EcH!756kbIvXwhXm5F6qJYiWzR;crGY4 zw!GAyBkJud^D#$Fu7qoF0dUr&hPc_p5^d%HhP!&!BC{iZivce8RggmrfIh+W-^ z(|1PA9S%y%wT-1QJ`zG>Y(h3d<>KwDtC@JgH{HQ_>UP zB5C%o$aIapbBfe#EXJSj3|#Be#|Xv#>b>9@-^nMa{{VVFSXcx~4w2>OODDM0C67S1 zazD_15I-~ZIN13A0Neinj`!m4>%FHfsPLR>>u&ShBq^)FYVJdkzPMgzo@SpUn^VQv zL4#+ZcW2)ES*A787nGzj#M9-&Ue9NWAN1*lO;bv9-81b@z8t^`)z_B~ZUmIR5|;*Hy{dsA%9c)cd1n z*vg%jK}0$4f|mB%5`*o7$GE(_cIf>?05#|4+OXL3(_gsS=5#bQqgX)jW9lwRmRq|h z&&lnu4h|AYj3YR;JTF?!(@wS3*yVW+OLa9HbC7+0Yf^8?#t?;ex$R})r@y_*2f0p9 zPhbtpT^n^*{ERi$bA3s*2CRlEDx3fkiIMZ{U zq~DNBAkHt5O?htes$k37G6V`dh}mg8xpHxK#ZPjMEjq<|VzZKOk#JGNVvOD(>rorZ zP$v{48rSFj;ftMTj(F#WQF?iGo+G}GlFy)N)_Dy1z#J^>p=P43_K(-t9_W=!-969n zkM2j~4jgQJf9>?YIc-MJq|9Pye>8P0F=dNrN)Z?f_c?dL>Y zEgla~UR;kGOIR9R=dCf|wTvY>Hq234MR5TX7rwOMx)857%ExN_lh5qn6>x{4v|cq6 zb{ZUZv>{FXI8`fc#+39YxyUi9>-{^ub-y2~gHKvo+WE=q{l=dg zvl+wc8borM`w4m0jEZZ>9Ff}cW&ufn$J^p&d1vzDWkqCZsm@dZG*DMrh;5oxpJU4x zvxzXtbnpYElOgNkXiFp3o^)Fk#`AC!2VOoTGV2`G_|`r4qqf>!^}W&uz1l6Ea3OF1 z0D@Q-6)+|g&9v(y(F3AP$N@{W5@@pv=XH{!HJBckCs#~6mB*zU7Robz;q^ZWe`ue9 zhYmJASK;{GgF9>THS1TEys4x09IyE9uV-P*;m9C-(Slodo#V$JxO-`9H(RM{RB}7i zzK79b)Ev0Hq^R|+Zr&PfS+>#6<#ir-*Xbmv^OGc5_4MDAss8}m+P|E4=*ycdbK7a( zi%JPX>~ta0ydejVC#)r))S^F`_L&>E_gbZNuH59_+e!T0iX#I$vbOcrs^E-R zR3Ho`tev%w)Ax;_t5QNjsM__7nIDDve`9!R{SKo`XM)xc)r<0vZ5FQ5@x09Oz)78g zw<|NFs3P&dA*rOA>YgQ(4Q^Xfx3Bc)?B4zn zVb;Mp1uHF90$pdexWme1A1@N23LIttf4>}6z%XIW)&BrbR0BRbqHVa4I9T;Ld6}fq zrS!(_bf>BtT*coH$@o&XD&^$avd;1RCHs%{!-pFmEB(0p6t06ei{8SMr$_EMk>y4yILvC|d-V93& zc9o z0sc1eibssTm zfYGNF!EEcpXX*_uAKK#h)TFeb9V5(a{fcpw;@rC{PnC$Q@5b9s>p99B+Zqp#zVMs# z`|VS`$R#1jQ{5bhp0s%>h2PecV(@0AW3rfx-b;3y!*JN|50!p|shzRCOJkMss3o$8 zqNgTLgUMLXlwzJ)GyGx!i6L;+i)B!YMw4ZxjF?+SsDIt-(jT=L2cgZO{+6i*coj}E z6(PC?2q#WMe8*pQFqzwR*#7MLJ4%YR%}t6!Ut;aC_^0k4tT=J8^8Wz6M+;p$_$u8U zUdiBGy8Cj4HMi1L*7}MlH@Uq^YEH1&kmHVrHhb={*kQA^zT9OmUjqB=cXckh`jVHJ zoZUvGiTg=1jVUfyMs`2pW2$%U-nhUnb7Y&}qFlw-(lGimbtfPF8hR{6b%vhZR!9vzAFc1)3+t0|e zbXg%j)1U!vjlOVnF)f;r*?4`OK-J@uX0`fHkVDPzqfV&`J0fn$kcOZ8LkFqI{?K3^ zw;j(+{{Rp>E8~h4+r#0cxYY%?z7@CglGROHtdLH|rDT_r{qZHa9WlfH3;Tu-Lx&$O zKbEb1x8Y^RdF*jE7u=5MI{yIU1ik%QWVq$qYjNAMw$7Dx4+UK=}&pJ#I$t1Dg8GartxMv{{a60qt^cbpT}9b zURcc_$HRY#V0itZvo_|UQ+tx^?V@cqh3?NGZ}Iy2(p^a$`*>^9-q&o}WMQYE`#0t| ziEAYnU3T|5O5?)!l>tJsB*361{E_431YFkItb*LrHb@!lkUkicDVtb7YC01i6hqlS z6rQnmH9#o`PnqLTbyozTTy#EqTwz=mG6QMY@!PeI?(ft(l)mlK_u1S@;%{2Sqq={z zFbo4a$Nk6qCJ#o(9j=})r&v~HS$bmVTfAo!%J1>1s5eozHZJ=Ns9za6{8u&4sl$z% z9|jcguhScGjf__B$#b?%y~vq5>B>pQOGNHo`kGMW+l_uVQ&U0p_qFM#&&az^>JB6< zxh=T3*Q0Av-%veke|+WoIKQ53A(ZN^oez_ zsW1@an4P6eklPx+NLSkC^XfT5yk%@c#uTagYgSrZFVy_a-kgbNA9Ju1T6}GxD0rmx zwkq4+%F|I2l=kQ5i+T&I%OLvm!^llS*6z)2fOxo;^vhW!5y$YZihFw+^6vXp1**>4 z2q?b961L=5!S1=msid~J6D<3Er8OXmzkxc!diyMx4ljLc)G?>S_%j=K*#oHIW8%(E zd=d%oLa8uq5{)b+>vBLKiK3=5VYGXC@ZdK@{c z@j?2;tq{927YrGmjf~WD;m`_O0Pt-%!lrkIypj}jJ$5}tYhd&^alczRCn)+&QX|RR zPV?N_c%9pU#_}9=Uz6A2O_yV1S7U3C)Y;P1P2F7{LtpNBzEgi^kJjaWr5oBi463|F z#?@bbarQQv7lz>@Lr{}heB;0I)rsS{y?#q=H~H-ov8o)9U*eAzBNlqC$c~OSF?25`ikQ5WsZZDQnT_B#( ziC*#bQJ6r#yRk&<~$0CG3*t!cOZh&uMxET6j?f_I5wn9)MsN25b+B_2nb+ z=y2pdF21Ms{{X50m>6ejay70O50Pa7>hFNFGyFHG&0~WJ!-pF-GHg}2&0B5HW4Sap%wF|5LqostzThPQ)Gjo z1OBw%`0$Z8*xFxqA9oeyp|5B^Gop2N^6-?LwDX80@hWq7z;A7G+83R-^-MVp{aWj7 zZEGy~T0z&5{{Zaq1Isrh9a-ODnvgtl@$1Lh@~*H^xibrj%CIJhZGJ=9)~=ljMOX#^ z37LS-<^GAjt@Zw_cx-ZdZ2I(nto*p>Y@fH|x%StGEzhRTq`3{9YwmB_`#QH~A(ZF+ zJk%!3tpte^6q8TK%~o>NO^v5G&I!~v9 z4(`7mYO}7@Vrlm)-nB_P?N159u7AmGv`M0~&=w;Rk6vK7HDZAGeDfVd1B2w0%Krcy zIs4BK*g;k&$v8cd~&m_-kU}^f=kEl6FVbJKIUAq?-72-01A;-(G`#5SzUj z;?f%XF%#C-otDusdeza}TnRv>6G9VhT`P1u6N03vAJ z(HYYVk(3$MNc>Fv<%l8AYOFw$cSq2Cf)qU!B!OtqBEY-W(U7vmQD%mnXK;Sb6SoYr$<1 z{sV76z-{--^1G7U-_gGOc;F0pcEsjW?y-{Ae2@!E=swWB6A z*4Ss1BqW!JFb0#3g_ze>BS+;n#9sHgbzu{Mn28bQU9fnzQ}$RIB!DYF67TSF$5;@N ztr`p63CBI)Zj}hjf-Tt2l;Izk7%*VK1_9_Y-_T~M`1NeB#>)EqoS|jHza(equD}Y_ zcAEKg@78PC7dFs~ej3LY)ZxvK(w>hG%x>%EjsBwDUR##jORo#Nu-tDGxv4qwT;!^E zZ+UC&>_N4!Ce}bPuTf)3Rnnuqoq$ZLz8fx&L;9Ic#>yNQi3Qg@*FDMS{{WKc6yM}i z%|4sgHM?=!lW()w&tkdY-kl6SIq{qfDNVf{!7SZ-MDQV!)#aE}a#Us7uFUB=`BFlk zb9E(zX^zAN>$ghxMkS(*b5gr=o9=FeciGxXaW>x)`d5CU{B8LAztNkIoVU^&`Z~Nv zk=#P_L3cPE{Gf_$t<0<`yS*B^UM?1~6QwnnD{FX1Vn+kKsCL8UAigl5ni9sLq#Ji~ z+K#(y?Ww&q%Uj$^PQF^aR>k8DB?n(mY|i1P`T>sS;&75)M6@7I3_tT0#OvD=Qlc^= zpbGrur$)up*?jvAT{-PjiE~@&mbe`^zR5DrCs+cJ~g@KJ-x(dp>5TQCPyIu08v3-{{SVHf8@ITAF1Jo>PUH?sbF*e03e5d=i!=M z#CF}Zh@TO?$HQAz;V)(Kv~{M~_{^T_bYAcLlFAJ*WgWbUeAYqZtP{xC#GTYk;+~B9 z)3UEGdOsl!LNuQ%#aDNAzLn9`T~DO>t)a8@Hz%zj^iJVOeK)%ezEg(@Qu*fS)bBr! zb!PHj#%t{Ome+>Fsdy8ut+jo5XzLwk;6(-$x`3x8^dJGsb{{T(<)%_1v&A;x4(SKNz6s>_FTBKq9 zhdAvsohBe$r|$=@x%pp1hZ`(DGlktKba|zwy0Wm?q*7bENOs&a4Si)yt;*Ym;P1Y> z{6XXPcVXH|7v6GGg{@IJ*&BOH*w~NtO+!%H3*jwOH*H1oO;PNENclJv8hy9%fHk)r z4Lx1nxWqp98-ko+$9^~uzt_*d*U!J#!OB&0Lw>fl{Lj9LQuEW;OVht`FGW**i{OPk zW7@vp=+2}Uqa0E^Qqwh{M7E*nS^NvIIsX9a5k*ZcT_ldgqy9sRiax&G^3#1xqtI3M_DHqV4PoaLKE_d2rO?2} zu@P)voF`F0m+%AmU#wIN%Mb>R_|X>M-Y@sJBi;+>?}@FkX=)vPy}fL&H@NMvhSZ^bP;DG|$gZa+wX02yirMOXAKpNH=a?6D9^#ey zgTc09#{;)-Vn>kjf`ZimOAR{{{{WnOML`j~(N~?~xLs;blHm2muN%ec+lmdE96plh z^4vTnt+U2y{{V;NIE^&on~&6lcGM01jy6e?x$Z`O^Y%z2IkS~g;`?2@^gP7SmhcQ$ zqcM>zjmJ(ecmDvD_Fu8}C$o?BCe6|xEw9dP>dm=*&wbv$Z?P$5Z*-QX)~>v#$#Eps znx*%+{rA3|DyjZQM^#Te<2CupgTwP&ldmbs^8%%d3j6B1!lI9>ymM9?5fE`aFf^c( zAbom)9yd_&)cb@NC9_rhP}j>5WUsyf*!CBu#0R>eG>P{TzrvM1ML*M_AI5x5wncbN zm055bC85FEL3ll0o@=WA0Dnd-32>wblY<%+eSO&_Qjs3W~QB|}z7gNQGGHaM? zXdzLn0(m`p`^mAlc4Q-AOO>(*O7ZTQcu--*&RqP*jj2i!7&?G^&1Zi$_bNHBrRl_R z)`jDUK1b;R0lWVIJ*0`V$9?|K7p=fCekcxyCgkv7d!J8n-}@X5HOq1Qu`{%poXV2{9tY%JA)Ij8}m?ih~g`frJA2o*KQ&o-Q6Q%J{C=!!$kWKLok3~ste(kxMe=uP z=Ao0fFJ+F!meWFO-6HNzwATizmU%=e(bK&jg_50^m4jrcR9;>Z)A&vB0+tv)S={(> za;xhRS*^Eb1$VWHn(`D&X;&9keI(~LGLg?TTq&gZHDlkhI@kB)e-J$e4?sNt^kJ0# z`lko0X6N*ipC0qcv&`XO2w1iqL5Zhc_9NmmtP^+1smcO92duwo`t&$*zo!1&9-k+% zkM!r%R94{m{_Vzdy&f~2Q7$)?eBa{^mM@pN(^F&k%?k#~+?;=Kvve_6If2^glkIHH z&cyY+r>g;1f)SrjZt>cibmqAJN}S%u5xc651IGeEkZ*N*``Kx1dz$-E>z1A*hluwXlH28H#BW`ngOrB6 zAtUa24V`6mcx-FJT*ZU$BQx<@DYK@!Tz5l^(;2l*HP1i5dJG<;2cY%0{F=YCK7$Tz z{{TrTDc7QW8uU@3!fX0UILlgRD4|L_A^ZCEHf1|IR;2#`34Z7L^lUj_)BgZ&7m?YD zuhhGvC(QC?-;>x@r`24E1L}@%C(rXz9>tNR{CBq8m5hEZr|v50y>BfkHg0OkH#r}e z=&&-@J#Fo57LKm4Ctq0)9j~ozhu73tPiF+`|ZLTXrQ|aBg zCcX4HK`tkj(>i;+uN|+U!EDQ4ZoICa`F>w_Ut@or*C&uk*k-B!0NcK>qew{?*edtd z*J|v?+q3+&B<0e&XluS)^2ORv=%^Vl>ZsUJad#0MA!VXECzp$n#`jZH!H8 z75jf8;CpmblSZ@hD#rR;hYqLw{{S3<-&oYl{a?!!ztoP#mypRd^o(r(f*@9{)KI^M&a&k)Py@cabm_?o(}bA3ruO`}Jo0ore+x2cc7at}&qZUWGG@_YKrEq!4lr=}n6wFQ3r z;9GB_xZ3Gy@&5o4)KvcfA+xEWU4Loyiu9aUA5U*&ZXnOPnN2oU;^pJzuf+W}YfT@u zuH?Ap*;nG!dGTjF_^kWQHP64-&%f8h$pt5uxXxczt+F-Gj=ZToo?BOK_8VDYEuKmV z7;@0*?Mm$?!^QTw8|*CmvWePfL+JV0E^jVc<$Y8lLM{I9s*QS03=$!dA~;q#7F zUBFt4LDLN5ejw;YAE_WcnGxs3A`Qw(dK#dK=_@Hot0IoMBJhLX&A9#AoJ82BTg}j3 zlTNc=Vlr3d^-=u?p!@*!7(H+Q04Be~E(7RuXZ}5KJ+Qgsons*CJ@^eWNaG(XtjH-w zU;#dZ`nnRIg#Q3?{?^e0k&RTQjBB+$4kMG)n{oWhX2;T7Wbzsb zr_A$u;WYS?zvK5eo=>g0Rov3GS8to$dvZ@xIPu+kX>$#;$gmZqHX14ptedKu%LC*s z){mA;kSMy`;Y8%LrEP`d>Otee;n9vA9bSZVJ2B$GGU5<{!TE0PqwGguc;ShESqdTa2z)F@?T$gEjf;CjUIo)Tj@*j6|X1s_a8R5)SITd zJnt8D<@Gc_@?Kwr{{R^4v*PbnV(sLqb0wAClozUvWkmM%(>o}Zf}r-9g(FEa@XNAS z>7F|i=}i`Sww9J}{{WHNP2M}nw)E+$c$bpXkos?0tE=?yF|D%v-_)RU`+S!E@X+MD z(AMN_PCne^0@s{I=eS+H*Keb__Qd)xn>c=w=3q|~$S%DHRO>k8aJzse=E*D<^d_yV zD*J4g1p%|-XxfN%Rw4_ZQ(4w|r*SL8e#IG-w%>ufxJ0e#u*t>Umj(w=w6{-&gzgdc7AC+=UbLx&$N{l1)9 zn;@w!Cs=qw_P%8m(`ob+2N#SzOGrL$#4SGr$$q^KHdFO~?exs9 z*3#M3ge&zHqBS|ozQrwTWoK^*F4JS_>lWS7*tYjKu!mENsZ!fDAkofOqn-f4pj*yTA*zE0z6%6Qq|ZFx;g9!j$){*B%wgxiJs z$8(Nk`qmasXJcBtihW14Bc;uKuJi0KspwONrz{Q-<2Z!qd4n>P9DL zUaiQ-FUWVAw|dnH?f12#pLCW$&Rb&gjKsXxf8gj;{k!=qN06~!y6kyN0wKe-37uW( zoYUm&1glCVbi*C4J8Uc4>X|3`{$MugEw-7*Y}QodHeY99{{U(*{=703q>RKL4~Wj^ zo)pUm@0iyQs-q9ia(sH}-|6~$nqqTep+;B9&-)`WG`u-hbCR7k1d zLN%dZe_i#R!D~-s*ExRDQ#anEAo;gXT=8nXWNR#&GAv5Q4A0bDU zYHmRkZSnZq2>E_St(M#`o0KiCzpBX1xg%YzP9JuYb6u9ijUNPJtNy^j=sgFe{{V6w zGI}3M{{US{lL}0UoCgaYqNBrb_V>K~tvO7*mssSXq;P6zWVp^Jt-T&3YW@k5^*C|z z{{X%*U*3l$r#0K=b~YPRkBVoGDd;lNdP#Imz5rIJZF`IOkD0;|zF2jl}nP2?o~Krna;v#lN+^Al;?YT{$1C z>lbgV@7+GGvx@cR2+b-HZ@t$F`><@GMsExNVIw*@@G+>P2Fd9xxw!uT`no8swa%b= z>D<1nNXK|HV0X_x&H;n+7%+N|OMd13)Aj2A0If<9C!lD1~RQZ>O|mi^J;-PC5X~hAe*0O4QT;0M$RgRQ<>FJq{dy)$>fyR$DCXsizr^z{_s0 zlha}F=bTFi1Qo6>q3Ph>7T1a^&}Zz<6=TZ9(_@Fxt+ju|zjxDE=FvkSFe@Agdou?o zC=4pNCQf+ufpf1-KZL952L~0Gs0N!Z=Z1aF`+HuQ-YQVw4S|>slG2CIUM(J<19)b^ z@(KwojJd9ji%cHT%j!E9u$B$jben$%dcl)^y~}U{yvlX{Qbz{til)doyL4P#-I5)) zpFhMUhSwcUO7bj%JZBrT$!}@@0P&ab{{a4D4ElrU{{Zni+cu(S-|Vh^GxXmp%BB8< z-cetqzN1Xt`hSt~KG;VEsJUcsB)j5#|0DKnLOVn70c|(&1NTHW~h4-Pt}X|YOFva7znj=Ewui165DYZbw_ za0bne2Y%CgZE?2u+DGtQmJB#>Z@tdX>JEE3vmi^C0Z}DI>;6p2vb`OV^GLfcQE5aj}%( z2Q@#XK=&w#(x+HU4D;(5nBnrp8SXX%(Lc3t3f+hsck!On7Is zEcs~Ch}@}MU#W@@{H|Hc@9A%1LBX*Md~$7?yjHFCzr$(g_-;Qx!D%p0r&WjkHEgE= zbym@=@U7H@+c-6OGn;&GB=cS-)32BCl(d&0TSBWH9zJ>PYOXs=@E?%DgS;3$Es6c7 z{e55culghXJ~kz_1-nTgXA`Eo6JCkIidUe@GtcBX!tdeuFYaG~$NgV4C1DuKdQv_>?&xt0CvB&&ji2nc<$u zpG;9suhWEIbLo`9%yFv8%xHB!SBp6O&jzFX&(lP2)hV?D-Y_WqXE9{cUVoHQpH%WI zhw6Q&%JR2%JMY|g$CP=rP&ufuJchfM1@Mie3%?Z4Eohws%gypj?*HU-Y@LeG+X8k~=5!(EwvpN30>kjdy! z0i;0tKn%4e$L)1g)wN_s_O3T05Eq-TbmlfazH5JG>&0%~LYvFc`-gwCs{^61_&EAz zqRx4D(Kxc>ac8O$at`J*#*(f&uv`BC$~p#wws9#+YKc>`Hp4HJhso1GM;wf&MFK8~ zC+toy9_Kx`L;nD_aIWShchehxkz9{6$&5Vi;Sb7f4_@$oK>k$4{mb?0<265HqBM1$ zsQhi@URy1!is}dOGIdV}d!65xg zRAEUbIhX~;f7f7oeBz$-wDH!;kM%k@I_-eP3*YI89pNq5Q{&Th;^P$?p%xgZI#!b5 zu98ms(*=2rm2q5d2yKN5+O<78WZkN49~FRimWp;w&B5D;|!OsI0RhxWhF+d zF=B7yU27D$+87Ssh0kuRxUPio42@(@99>y3*4Nbc8KCw4V!e@t-^f3c-Pv6U@42Hr ze1T<9lsR3gD;>XRdC9TTX20!>2i5U^az7ren!gTa1oeiug!W`|)KL1163DZ)MvaiC zAGh0R?XgWg^aG9B!=bj}IG{4>7XGV}^f++iutIFJeF|V4b}_7NSt?YU@}-VKta-NTS3ERMbu(pA%TIti0JY>4H*MKs>w( z$Lw@jEK(ZuB;df=#%!5w5U*1=3+6*5yc=WgUp$w)I=)lMahhWShrpj6(OhNtQ{kQg zjaSBU^XZ(ck3;^z^d7I*Yx|e$)w4!ruU>@KtQG4_PRi(fX3+`}yIfoY)Mn@Z0B_V} zaACvf*?iWUz2WyeaHu{~Jsm!93DCeQ_Si1vK}?DDt+|BvK6DT ztQoc_N1oOl9p``Xy8)Mq2NQ~b38N*&*G0sHGDB8)o5%^|<`4O0IJ~YmEmAAUS;KM+ z4EE?yZrCI4?7EGrg*=JvM$XiRl|Y7qE3U4f$pu)eyOJGR@+PxU(f5hzI<~TwN=jH? z#SQ*Nb`&845otu))JBAM$y9@y#42r9;BI)2@(qJ}nf^k!yUt4O z0h8JXH;;Rr`izh0XMTqcUm-T7c*}sBd|tlvwsozqT-MyayXtbfnAm40K-JW0u;G+}#;-{V_@pYhUgH_q=Y|2D=Xi+Cg;nuj_>75zS+s>j^A-11`RV+Nx* zo3FA9JQ3M+QDyc8ka5y%zM%zmyTheaqE*3~>OAKYG)MNPyK2Ff&4IR3tt!%kU*atU zz%4_463W~pTPv)HL^DR|Fp!GlUKE%{Acjm6LK3tEE*Yu4B7W*#*mNw3CC}2cCrWAH zx+qrK%5seCzwD1egVn!j`;Y6?=DDb@ryXlL@%lqIB{h}{6wA<;7g3(*Yz_t*t_xtZ zV(i#v!6yEl;Qhx3sm9HWjLXxDU?(Nlg#n&B1xh@+_acS1qD>ee)!$lDw(g{D_2GBM zi3~L>Li%x(<~#)Rs?EB(U*u+c&!?L^m5wAzYadv!|e)G6d> zxV0pyF6CIrZBmW8nwt@<9z=PD)!JprS_LPyE|#A1zKI~hod|Y5)}D9rcX{}$d!7sL zE-fTTs&>Rv1EMRLb)WTv^B#lM{uy##*VN||vSm}g_}q@pQ)bQve(itx1_jB$oI1S& zKClCdTQ)!EKBFOn)Z=Ey=@+-sy8#|crW~HD=I)ez!K4Nztd1tSbal~5ocmv8N9?Jp zvRf??LYkpxyo#Gc_dYjRDAdwihQ5#~trlPL*X(hQRB-BXc$i}G(HZtK8TQTCg#c)| z&$mqFH0zcDYEKm$G9^X{P)}!7A2&GL#X_z_V9E8{%-NQO=d`6f#Y+d8lerF>zDvo~pLr?m*1V#oy?38u zRPuHY4n~SoZ^D;OLt&HY*Lq8X(M;CkwDBUiWQ&s>Gq*)T;d15CPF;Iq(E%w=$+8+< z!?wbN7mG~oM9j3v*a8F0C|Wm{y+#&fC=CIlgNEUsC=Z>R&UDwQj-e6{t{Gca z1gq;R`H&xGqJpQ@vO@zubQM!fcDT;wMj&XHMeX+vP^4- zCa*2c2ckR-%3gp&k)R88b?>wu!nCOBX-gINs-x~1nCmVbSbK5!Z}brsyDqg16dzvy z0N5UY`XBBxKiAaf9FaQ(bcPnz#wmeER$p=T^?V&H0QahJHpUBA7Qhj`Fh?fYzTvk| zsL0^M=y9_D0Hj|h;jKRaFd;aSz=_p$mn#7IO?t?lv0Edu<0ZdBsbwS0GstJu{SH)q z^-^DK7%mdNL6;3WEkTcA;M|M*n|pH^{%>t8yDrsz<&__qiK1=lJMG)t+_u?*Cg%GT z;Q1;A`hQfEzyaM^2*@s1$jMxSp3|5$0Rb2dHe^8-ON_$o=ag)iWRWsjrBzz9a zrm}|i$zN%g5nPmdj7C%q(}U&Hc?>Ave-s9WAorM_NONeVsWMs{@ zAx7dl)^ym-*kClQZj@KH_{z{`dH0mD;*qyonaxB>nH=kw1O7*pfV;pPYcHWRlcN>dSV zMouYDhHl;4rY;Czq`!R+fX@B-gPuj#zQZ4$sn=BtIsmFbRlih$*G%DYsp4LsAGl8; zb?7?xMsyxba3VX>oo2x$E}kW_8_hhQE!wu@410N^-xl-+cc85+AnCM|Z+#tONpx~HQ8#iZZ~(_al^hQKYU<|t2)&Ex+7wdw{(1{^+(m;EC7 zKN;;JEeOfKxV|Sm6z7RnESPp+L;bqFVX0E~(@@VMqCW~GFuZe`VB3c^D!*td0?wZ_ z?c={6s1YrGp{KqPqsR%>w#FFk?=o;f2Cj~bq`wOy9P;SE@!}d3L~ng@y(_mQjZDIoh0M8A+xumjB$~3JPF|j8Mx<5T z0AJh$)fcYtFmpB43~art-LRBnRz@|Xrd-K@s+qNqOy{<_I6Ldl;0E#Ov%ZS^z=vQI zv0%rwlQuM%?*P8Kxzd7fs2fC22c=PspaKP-sEFk62&|7 zpaaN)&7R!Ob~-)bI2!|c3?J-|LFj+K{{X0;#fSdLTvf$PT<n7hnp`ZtSo<8 zchX&7@;qu_=3uzo{gfVeeS>fAIeS=>omGb)$tJ=&tFy2L_8xd@{c?hZ^cT*_=E9!f zA|DTHI09&@AX3%}$@(S9NK9O$mAs-L?JplG=Zkmkg5M>&8SK-?!Er$86pTtDsqrBZ z^V&Xs+(W4NB>{{i3oCo=RYDu!dtD;{9&_=oyZJIE8T^njCc_?qN%J{HJsCO2R9uv1 z)?_4<7$~cgp3%CJS*K7vt$HE}PYl%yH1OL!)8D$K{MWatvX>|g+ZpmbD5P5Wx=k=O zS%HEEbWlo732Y?imV9~^^&X?tdJG`f$NDjHf$m~|ya+*(Sk{Z?b07mktbk0%eQ%C-FEYzht zeQNhahhf#z7`o=GswGXC>90j~?bNjb9}*fK)6r&xpwI06R}qjiQP|KN@{%(5f!UF2 zLHCO%x(NP3gRg$;s!pW4xlo%Q%9Z)|0^(VLR~|;FacQogwzCqd!`s(6WGh~(Gs5L( zO_PX~2f&;O+Z|8_v{^$T7-q*RiR6i5fp!3CuwNe>WH_@{R$mqviP@~X#Mb~gaD4IE zv%F>p81_pXY$W>m5rMEjsKMwzFZ&*+`?vRx>+sVH_tPpcTnrY$PRo_?Vhed3RyY~6 zx6a=I&0VBL=Urcs^zT3J7(WKfeuwW?Xcn+QHQhr3^D|)URdvS;gUhnbgt!gS!Uh!$ zos}ZPH~f9VejtDKi*^H_PZq$r?cs?F^u)H(_O#t=39y!xlWNqNRdz4SvvsCGyrxU6 zcNxlgItEO6`Q${<(#Qhru_U@Pqbdj3E3)dSj4F&J(`wcBB~?JyahCVU7VsXxXPhKd ztF8oMo*oWg1y~Po2LV{CsSWa2P)$|59sH486_<%3Z|->XQD@TKRhG*(I|PqD-D(pL zWm$_L+3-iYDBgXu4QMu=#!kP6POL`Hw;A-XHGY@=Nj6WgCli!?3yc3!idx?(2x&Wi^oC z)Sd~Tz=?uun^TBZ&Qt#YxA;f)KLh%&>ogWV)r|aU@bai-olKcF)t0D-3iX9bs+R!K zu#uikjFLFhQoMo%e2g3XX;Fy41$89t%wujt4*W(Xfj{Id)6+`%ZgMHC{_|S3vMwcJ zu;d@g8s28&)_k3+KvKr$VG*bgvRtI#p_&II(@?t8Y`c(M*t2BX7LypFl7!X)5>s07n~YE0na{8o0vvxyyC;e}r&)A48S>W~70})FE0xU7jP_ zL33G+nk93R_RJ!oo^!IT-{zlixPey&zrhBI^(i%!{AKV6a`vqT7Yrtakp04~Rn8lR zmxDUloV$S98>RqzVd+DHlZe-)nYseL zbp?$&jhJAqO0zLUvNLzox~g2yFPILxh-bjKlZHYr?FGC)=}_$7(OLyrVs_dCJr`A_*F z`ur6C0PXfOvx67U063d2gYKz?&Ut!;#G7@@CtIGQBl{nL%Ko!N00&(>^0Qp;`Q+-z z(5kR_k&V|~j+AGQ$zJpMb;+@012}ukas0l8S7>@KqMq@acXXV6*0o2t1CF!36XYH zU56;mtQNNFfGen~G5`w9H~}4T1Nr+Xt9L#V)K^7jEi{Lpkz9DIsh|33(@d}<&cG!Q zyl*bW2oHI(mQ~8vw3Py?Yz%PDy?EkKwpH7<^3N6_KWi@B^a!GOh8f!6SH_WG(q9}U zMgX`#!jrmMhqUUmHz;iKZ=k2tXwVe z-h5(&z!N-IK0Oo11yytIJI-mmw|@zK{{RD({T$;ag;dOAdrg&!_g!Vj1RT|NmLsDF zOe^Dx7opZo2VTtk%uZb?B zw^G0$OezKm{NqQaARPG5;pD+Xc60cU)9HVPA3^@u!RYy4`D6O@9*h3~+ws7-69SA2 ziobz5-glknEI;E_XSUQVHVVKe4uj?IV$ zEcHnPRT4DB)YZD~a5auzFtXo^Gf|82htYlloYCZVwAw#WLd)G5cFp~ra=veH`5B7O z_z}|#s=XSq{uP-y-)agHrvOg}O1Tx1Gt?DuJQ<``@qXo9m5m(RCipMdAUl0cK^ z&U-+GMd+%wdq<%3E1nDZkH{az@|}6d@f{7IMtNs=q0TI*absK>yC)w?>xsrtgT(=j zp{$v7g5l0}df4^rC4?-snboQWaK03t^y4@Gjxi zaa{p(JciGUCn0ro`i8L^TOWU7-8R=B8j9;)1mVHx52mKw;X%x6Y(f%O&0(zbdZjJ? zlEQAjVx(2y-DbMwMyNq4cBu}`)+YWrDI$P83PV}j?KMiIrVnhv0{~UK@Bhpx#ad6E=k{1k8sy7uje-^3Bnz{qiv;5vnB zx*9x4WCdNBLX~tN4?y1o@gMCzSFbq!F;#8UNaD&^?J&ter0q>{H|BHiAO&|F@iywY zTb3tgg-P$zne5*WtgwCuDf-SVpkD>*l2%fM8f9?IHd9#U#90{}jF?LTUJb3Zq%xzH zL5_pK1Z+iOs}*A-;L~MB_s3*TnQbe8k?MOqJPuo4vVTBxyhD=y@x3CNoUXp>J5_0D z4>K9448^RaDJ6i%qs)01gyAv~95SSqX6#AVUmt2D`N?pyQA;YBU$DZ`r5f(#E7@ih zyl4mdi-zi3QgKUDtF{eB;-aYfODv*B@T0UX^QNuBbp+hd2 z4M>}mMP!3M zCki?OI}6({;noBA56S-kRQ}Sz_&-A9@a$r~c^vXMJpuLRKjhB621^Iv*-UTW5>N$D z7sm|qWX;~UyV1cvd!GXn;#w)LW)y5iM1FwRmumuzhQ5me#_AJF*&pKwt zXMAVbBoxLB^!A-W2m;1L_)IE~3^b4=ODKp;yf9P9mu~tkj-)d{klOS`@hTZWJ-Xw0 zHg<3^1IRuQ?!Io zmIgu7+v!h@8||6d5s?m~GQA?8_+X)S4#95qDFs%GqvM2edZNyApN(l>6=|_3okMPt zde^#9Nf1eyO%SL;g-a;%wjb3YA9R%RM{f1fF5s%WF%R$IG-CE{jUpnozhtnbXK69O zgy2VF@i-WaM^lBovD}b57+(e#@ht9DDKJeNJ>#D6=uN_ZEVJ~UN~5w$GnD@Tea=7b zU$4U7+;`WfjuDCu1@vD-pBdycn*QhTpX`1I9Ap0gZ>L&7*Fj2SVaHI)|JEQkXDBCE31SrI>RrxLzh*N;qEr>1nmV-$ zb_tkpOb3b8@mfIwc%+?HMvyZY(6BSJ2JlyE_DteJ&Trh2%ca>J9!_I(?RoVjyEvzY zL@f!B@m|!JtG8h9dSu1@!x_F;l={YdS2bxGt9OLxnJZ9q740yz#Q)f