diff --git a/.github/workflows/PR-4.x-U20.yaml b/.github/workflows/PR-4.x-U20.yaml new file mode 100644 index 0000000000..978a3a4301 --- /dev/null +++ b/.github/workflows/PR-4.x-U20.yaml @@ -0,0 +1,163 @@ +name: PR:4.x U20 + +on: + pull_request: + branches: + - 4.x + +env: + EXTRA_CMAKE_OPTIONS: '-DBUILD_DOCS=ON -DPYTHON_DEFAULT_EXECUTABLE=/usr/bin/python3 -DBUILD_EXAMPLES=ON -DOPENCV_ENABLE_NONFREE=ON -DENABLE_CCACHE=OFF' + OPENCV_TEST_DATA_PATH: '/opencv_extra/testdata' + OPENCV_DOCKER_WORKDIR: '/__w/opencv/opencv' + PR_AUTHOR: ${{ github.event.pull_request.user.login }} + SOURCE_BRANCH_NAME: ${{ github.head_ref }} + TARGET_BRANCH_NAME: ${{ github.base_ref }} + ANT_HOME: '/usr/share/ant' + PYTHONPATH: /opencv-build/python_loader:$PYTHONPATH + +jobs: + BuildAndTest: + runs-on: ubuntu-20.04 + defaults: + run: + shell: bash + container: + image: quay.io/asenyaev/opencv-ubuntu:20.04 + steps: + - name: PR info + run: | + echo "PR Author: ${{ env.PR_AUTHOR }}" + echo "Source branch name: ${{ env.SOURCE_BRANCH_NAME }}" + echo "Target branch name: ${{ env.TARGET_BRANCH_NAME }}" + - name: Clean + run: find . -mindepth 1 -delete + - name: Fetch opencv + uses: actions/checkout@v3 + with: + repository: opencv/opencv + ref: ${{ env.TARGET_BRANCH_NAME }} + fetch-depth: 0 + - name: Merge opencv with ${{ env.SOURCE_BRANCH_NAME }} branch + run: | + cd ${{ env.OPENCV_DOCKER_WORKDIR }} + git config --global --add safe.directory ${{ env.OPENCV_DOCKER_WORKDIR }} + git config user.email "opencv.ci" + git config user.name "opencv.ci" + git pull -v "https://github.com/${{ env.PR_AUTHOR }}/opencv" "${{ env.SOURCE_BRANCH_NAME }}" + - name: Clone opencv_extra + run: git clone --single-branch --branch ${{ env.TARGET_BRANCH_NAME }} --depth 1 https://github.com/opencv/opencv_extra.git /opencv_extra + - name: Configure OpenCV + run: | + cd /opencv-build + cmake -G Ninja ${{ env.EXTRA_CMAKE_OPTIONS }} ${{ env.OPENCV_DOCKER_WORKDIR }} + - name: Build OpenCV + run: | + cd /opencv-build + ninja + - name: Accuracy:calib3d + run: cd /opencv-build && xvfb-run -a bin/opencv_test_calib3d + - name: Accuracy:core + run: cd /opencv-build && xvfb-run -a bin/opencv_test_core + - name: Accuracy:dnn + run: cd /opencv-build && xvfb-run -a bin/opencv_test_dnn + - name: Accuracy:features2d + run: cd /opencv-build && xvfb-run -a bin/opencv_test_features2d + - name: Accuracy:flann + run: cd /opencv-build && xvfb-run -a bin/opencv_test_flann + - name: Accuracy:gapi + run: cd /opencv-build && xvfb-run -a bin/opencv_test_gapi + - name: Accuracy:highgui + run: cd /opencv-build && xvfb-run -a bin/opencv_test_highgui + - name: Accuracy:imgcodecs + run: cd /opencv-build && xvfb-run -a bin/opencv_test_imgcodecs + - name: Accuracy:imgproc + run: cd /opencv-build && xvfb-run -a bin/opencv_test_imgproc + - name: Accuracy:ml + run: cd /opencv-build && xvfb-run -a bin/opencv_test_ml + - name: Accuracy:objdetect + run: cd /opencv-build && xvfb-run -a bin/opencv_test_objdetect + - name: Accuracy:photo + run: cd /opencv-build && xvfb-run -a bin/opencv_test_photo + - name: Accuracy:stitching + run: cd /opencv-build && xvfb-run -a bin/opencv_test_stitching + - name: Accuracy:video + run: cd /opencv-build && xvfb-run -a bin/opencv_test_video + - name: Accuracy:videoio + run: cd /opencv-build && xvfb-run -a bin/opencv_test_videoio + - name: Performance:calib3d + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_calib3d --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:core + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_core --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:dnn + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_dnn --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:features2d + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_features2d --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:gapi + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_gapi --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:imgcodecs + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_imgcodecs --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:imgproc + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_imgproc --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:objdetect + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_objdetect --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:photo + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_photo --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:stitching + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_stitching --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:video + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_video --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Performance:videoio + run: cd /opencv-build && xvfb-run -a bin/opencv_perf_videoio --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1 + - name: Python3 + run: | + cd ${{ env.OPENCV_DOCKER_WORKDIR }}/modules/python/test + python3 ./test.py --repo ../../../ -v + - name: Java + run: cd /opencv-build && xvfb-run -a python3 ${{ env.OPENCV_DOCKER_WORKDIR }}/modules/ts/misc/run.py . -a -t java + - name: Save Unit Test Results + uses: actions/upload-artifact@v3 + if: always() + with: + name: junit-html + path: /opencv-build/java_test/testResults/junit-noframes.html + - name: Pylint + run: cd /opencv-build && cmake --build . --config release --target check_pylint -- -j4 + + BuildContrib: + runs-on: ubuntu-20.04 + defaults: + run: + shell: bash + container: + image: quay.io/asenyaev/opencv-ubuntu:20.04 + steps: + - name: PR info + run: | + echo "PR Author: ${{ env.PR_AUTHOR }}" + echo "Source branch name: ${{ env.SOURCE_BRANCH_NAME }}" + echo "Target branch name: ${{ env.TARGET_BRANCH_NAME }}" + - name: Clean + run: find . -mindepth 1 -delete + - name: Fetch opencv + uses: actions/checkout@v3 + with: + repository: opencv/opencv + ref: ${{ env.TARGET_BRANCH_NAME }} + fetch-depth: 0 + - name: Merge opencv with a test branch + run: | + cd ${{ env.OPENCV_DOCKER_WORKDIR }} + git config --global --add safe.directory ${{ env.OPENCV_DOCKER_WORKDIR }} + git config user.email "opencv.ci" + git config user.name "opencv.ci" + git pull -v "https://github.com/${{ env.PR_AUTHOR }}/opencv" "${{ env.SOURCE_BRANCH_NAME }}" + - name: Clone opencv_contrib + run: git clone --single-branch --branch ${{ env.TARGET_BRANCH_NAME }} --depth 1 https://github.com/opencv/opencv_contrib.git /opencv_contrib + - name: Configure OpenCV Contrib + run: | + cd /opencv-contrib-build + cmake -G Ninja ${{ env.EXTRA_CMAKE_OPTIONS }} -DOPENCV_EXTRA_MODULES_PATH=/opencv_contrib/modules ${{ env.OPENCV_DOCKER_WORKDIR }} + - name: Build OpenCV Contrib + run: | + cd /opencv-contrib-build + ninja diff --git a/.github/workflows/timvx_backend_tests.yml b/.github/workflows/timvx_backend_tests.yml new file mode 100644 index 0000000000..c45a86f8b2 --- /dev/null +++ b/.github/workflows/timvx_backend_tests.yml @@ -0,0 +1,104 @@ +name: TIM-VX Backend + +on: + pull_request: + branches: [ 4.x ] + types: [ labeled, opened, synchronize, reopened ] + + +jobs: + x86-simulator-build-test: + runs-on: ubuntu-20.04 + # Docker image from https://hub.docker.com/r/yuentau/ocv_ubuntu + container: docker.io/yuentau/ocv_ubuntu:20.04 + env: + PR_AUTHOR: ${{ github.event.pull_request.user.login }} + SOURCE_BRANCH_NAME: ${{ github.head_ref }} + TARGET_BRANCH_NAME: ${{ github.base_ref }} + steps: + - name: info + run: | + echo "PR Author: ${{ env.PR_AUTHOR }}" + echo "Source branch name: ${{ env.SOURCE_BRANCH_NAME }}" + echo "Target branch name: ${{ env.TARGET_BRANCH_NAME }}" + - name: clean + shell: bash + run: find . -mindepth 1 -delete + - name: fetch opencv + uses: actions/checkout@v3 + with: + repository: opencv/opencv + ref: ${{ env.TARGET_BRANCH_NAME }} + fetch-depth: 0 + path: opencv + - name: merge opencv with test branch + shell: bash + run: | + cd opencv + git config user.email "opencv.ci" + git config user.name "opencv.ci" + git pull -v "https://github.com/${{ env.PR_AUTHOR }}/opencv" "${{ env.SOURCE_BRANCH_NAME }}" --allow-unrelated-histories + - name: configure + run: | + cmake -B build -DWITH_TIMVX=ON -DCMAKE_INSTALL_PREFIX=./install -DBUILD_SHARED_LIBS=ON -DBUILD_PERF_TESTS=ON -DBUILD_TESTS=ON -DBUILD_EXAMPLES=OFF -DBUILD_DOCS=OFF -DWITH_OPENCL=OFF opencv + - name: build + run: cmake --build build --target install -j $(nproc) + + khadas-vim3-tests: + if: contains(github.event.pull_request.labels.*.name, 'category:dnn_timvx') + concurrency: + group: khadas-vim3 + cancel-in-progress: false + runs-on: [self-hosted, Linux, ARM64, khadas-vim3] + env: + PR_AUTHOR: ${{ github.event.pull_request.user.login }} + SOURCE_BRANCH_NAME: ${{ github.head_ref }} + TARGET_BRANCH_NAME: ${{ github.base_ref }} + steps: + - name: info + run: | + echo "PR Author: ${{ env.PR_AUTHOR }}" + echo "Source branch name: ${{ env.SOURCE_BRANCH_NAME }}" + echo "Target branch name: ${{ env.TARGET_BRANCH_NAME }}" + - name: clean + shell: bash + run: find . -mindepth 1 -delete + - name: fetch opencv + uses: actions/checkout@v3 + with: + repository: opencv/opencv + ref: ${{ env.TARGET_BRANCH_NAME }} + fetch-depth: 0 + path: opencv + - name: merge opencv with test branch + shell: bash + run: | + cd opencv + git config user.email "opencv.ci" + git config user.name "opencv.ci" + git pull -v "https://github.com/${{ env.PR_AUTHOR }}/opencv" "${{ env.SOURCE_BRANCH_NAME }}" --allow-unrelated-histories + - name: fetch opencv_extra + uses: actions/checkout@v3 + with: + repository: opencv/opencv_extra + path: opencv_extra + - name: merge opencv_extra with test branch + shell: bash + run: | + RET=$(git ls-remote --heads "https://github.com/${{ env.PR_AUTHOR }}/opencv_extra" "${{ env.SOURCE_BRANCH_NAME }}") + if [[ ! -z "$RET" ]]; then + cd opencv_extra + git config user.email "opencv.ci" + git config user.name "opencv.ci" + git pull -v "https://github.com/${{ env.PR_AUTHOR }}/opencv_extra" "${{ env.SOURCE_BRANCH_NAME }}" --allow-unrelated-histories + else + echo "no merge since ${{ env.PR_AUTHOR }}/opencv_extra does not have branch ${{ env.SOURCE_BRANCH_NAME }}" + fi + - name: configure + run: | + cmake -B build -D CMAKE_BUILD_TYPE=RELEASE -DCMAKE_INSTALL_PREFIX=./install -DWITH_TIMVX=ON -DWITH_OPENCL=OFF -DWITH_EIGEN=OFF opencv + - name: build + run: cmake --build build --target opencv_test_dnn -j 4 + - name: unit tests for int8 layers + run: | + OPENCV_TEST_DATA_PATH=./opencv_extra/testdata ./build/bin/opencv_test_dnn --gtest_filter="Test_Int8_layers.*/1" diff --git a/3rdparty/libtim-vx/tim-vx.cmake b/3rdparty/libtim-vx/tim-vx.cmake new file mode 100644 index 0000000000..7fb9f34352 --- /dev/null +++ b/3rdparty/libtim-vx/tim-vx.cmake @@ -0,0 +1,73 @@ +set(TIMVX_COMMIT_HASH "1d9c7ab941b3d8d9c4d28d80058402725731e3d6") +set(OCV_TIMVX_DIR "${OpenCV_BINARY_DIR}/3rdparty/libtim-vx") +set(OCV_TIMVX_SOURCE_PATH "${OCV_TIMVX_DIR}/TIM-VX-${TIMVX_COMMIT_HASH}") + +# Download TIM-VX source code +if(EXISTS "${OCV_TIMVX_SOURCE_PATH}") + message(STATUS "TIM-VX: Use cache of TIM-VX source code at ${OCV_TIMVX_SOURCE_PATH}") + set(TIMVX_FOUND ON) +else() + set(OCV_TIMVX_FILENAME "${TIMVX_COMMIT_HASH}.zip") + set(OCV_TIMVX_URL "https://github.com/VeriSilicon/TIM-VX/archive/") + set(timvx_zip_md5sum 92619cc4498014ac7a09834d5e33ebd5) + + ocv_download(FILENAME ${OCV_TIMVX_FILENAME} + HASH ${timvx_zip_md5sum} + URL "${OCV_TIMVX_URL}" + DESTINATION_DIR "${OCV_TIMVX_DIR}" + ID "TIM-VX" + STATUS res + UNPACK RELATIVE_URL) + if(res) + set(TIMVX_FOUND ON) + message(STATUS "TIM-VX: Source code downloaded at ${OCV_TIMVX_SOURCE_PATH}.") + else() + set(TIMVX_FOUND OFF) + message(STATUS "TIM-VX: Failed to download source code from github. Turning off TIMVX_FOUND") + return() + endif() +endif() + +# set VIVANTE SDK especially for x86_64 which comes along with TIM-VX source code +if(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64) + set(VIVANTE_SDK_DIR "${OCV_TIMVX_SOURCE_PATH}/prebuilt-sdk/x86_64_linux") + message(STATUS "TIM-VX: Build from source using prebuilt x86_64 VIVANTE SDK.") +endif() + +# Verify if requested VIVANTE SDK libraries are all found +find_vivante_sdk_libs(missing ${VIVANTE_SDK_DIR}) +if(missing) + message(STATUS "TIM-VX: Failed to find ${missing} in ${VIVANTE_SDK_DIR}/lib. Turning off TIMVX_VIV_FOUND") + set(TIMVX_VIV_FOUND OFF) +else() + message(STATUS "TIM-VX: dependent VIVANTE SDK libraries are found at ${VIVANTE_SDK_DIR}/lib.") + set(TIMVX_VIV_FOUND ON) +endif() + +if(TIMVX_VIV_FOUND) + # vars used by TIM-VX CMake scripts + set(EXTERNAL_VIV_SDK "${VIVANTE_SDK_DIR}" CACHE INTERNAL "" FORCE) + set(VIV_SDK_DRIVER_PREFIX "lib" CACHE INTERNAL "" FORCE) +endif() + +if(TIMVX_FOUND AND TIMVX_VIV_FOUND) + set(BUILD_TIMVX ON) +else() + return() +endif() + +if(BUILD_TIMVX) + set(HAVE_TIMVX 1) + + ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter -Wstrict-prototypes -Wundef -Wsign-compare -Wmissing-prototypes -Wmissing-declarations -Wstrict-aliasing -Wunused-but-set-variable -Wmaybe-uninitialized -Wshadow -Wsuggest-override -Wswitch) + ocv_warnings_disable(CMAKE_CXX_FLAGS -Wunused-parameter -Wstrict-prototypes -Wundef -Wsign-compare -Wunused-but-set-variable -Wshadow -Wsuggest-override -Wmissing-declarations -Wswitch) + + set(TIMVX_INC_DIR "${OCV_TIMVX_SOURCE_PATH}/include" CACHE INTERNAL "TIM-VX include directory") + if(EXISTS "${OCV_TIMVX_SOURCE_PATH}/CMakeLists.txt") + add_subdirectory("${OCV_TIMVX_SOURCE_PATH}" "${OCV_TIMVX_DIR}/build") + else() + message(WARNING "TIM-VX: Missing 'CMakeLists.txt' in the source code: ${OCV_TIMVX_SOURCE_PATH}") + endif() + ocv_install_target(tim-vx EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev) + set(TIMVX_LIB "tim-vx") +endif() diff --git a/3rdparty/readme.txt b/3rdparty/readme.txt index 4e4a6ba0a6..e67304c5ef 100644 --- a/3rdparty/readme.txt +++ b/3rdparty/readme.txt @@ -37,7 +37,7 @@ libtiff Tag Image File Format (TIFF) Software WITH_TIFF CMake option must be ON to add libtiff & zlib support to imgcodecs. ------------------------------------------------------------------------------------ zlib General purpose LZ77 compression library - Copyright (C) 1995-2012 Jean-loup Gailly and Mark Adler. + Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler. See zlib home page http://www.zlib.net for details and links to the source code ------------------------------------------------------------------------------------ diff --git a/3rdparty/zlib/CMakeLists.txt b/3rdparty/zlib/CMakeLists.txt index 9758861a6b..709e293c28 100644 --- a/3rdparty/zlib/CMakeLists.txt +++ b/3rdparty/zlib/CMakeLists.txt @@ -83,6 +83,7 @@ ocv_warnings_disable(CMAKE_C_FLAGS -Wshorten-64-to-32 -Wattributes -Wstrict-prot -Wundef # _LFS64_LARGEFILE is not defined /wd4267 # MSVS 2015 (x64) + zlib 1.2.11 -Wimplicit-fallthrough + /wd4244 # MSVS + zlib 1.2.12: warning C4244: '=': conversion from 'ush' to 'uchf', possible loss of data ) set_target_properties(${ZLIB_LIBRARY} PROPERTIES diff --git a/3rdparty/zlib/ChangeLog b/3rdparty/zlib/ChangeLog index 30199a65a0..f0b0e61809 100644 --- a/3rdparty/zlib/ChangeLog +++ b/3rdparty/zlib/ChangeLog @@ -1,6 +1,69 @@ ChangeLog file for zlib +Changes in 1.2.12 (27 Mar 2022) +- Cygwin does not have _wopen(), so do not create gzopen_w() there +- Permit a deflateParams() parameter change as soon as possible +- Limit hash table inserts after switch from stored deflate +- Fix bug when window full in deflate_stored() +- Fix CLEAR_HASH macro to be usable as a single statement +- Avoid a conversion error in gzseek when off_t type too small +- Have Makefile return non-zero error code on test failure +- Avoid some conversion warnings in gzread.c and gzwrite.c +- Update use of errno for newer Windows CE versions +- Small speedup to inflate [psumbera] +- Return an error if the gzputs string length can't fit in an int +- Add address checking in clang to -w option of configure +- Don't compute check value for raw inflate if asked to validate +- Handle case where inflateSync used when header never processed +- Avoid the use of ptrdiff_t +- Avoid an undefined behavior of memcpy() in gzappend() +- Avoid undefined behaviors of memcpy() in gz*printf() +- Avoid an undefined behavior of memcpy() in _tr_stored_block() +- Make the names in functions declarations identical to definitions +- Remove old assembler code in which bugs have manifested +- Fix deflateEnd() to not report an error at start of raw deflate +- Add legal disclaimer to README +- Emphasize the need to continue decompressing gzip members +- Correct the initialization requirements for deflateInit2() +- Fix a bug that can crash deflate on some input when using Z_FIXED +- Assure that the number of bits for deflatePrime() is valid +- Use a structure to make globals in enough.c evident +- Use a macro for the printf format of big_t in enough.c +- Clean up code style in enough.c, update version +- Use inline function instead of macro for index in enough.c +- Clarify that prefix codes are counted in enough.c +- Show all the codes for the maximum tables size in enough.c +- Add gznorm.c example, which normalizes gzip files +- Fix the zran.c example to work on a multiple-member gzip file +- Add tables for crc32_combine(), to speed it up by a factor of 200 +- Add crc32_combine_gen() and crc32_combine_op() for fast combines +- Speed up software CRC-32 computation by a factor of 1.5 to 3 +- Use atomic test and set, if available, for dynamic CRC tables +- Don't bother computing check value after successful inflateSync() +- Correct comment in crc32.c +- Add use of the ARMv8 crc32 instructions when requested +- Use ARM crc32 instructions if the ARM architecture has them +- Explicitly note that the 32-bit check values are 32 bits +- Avoid adding empty gzip member after gzflush with Z_FINISH +- Fix memory leak on error in gzlog.c +- Fix error in comment on the polynomial representation of a byte +- Clarify gz* function interfaces, referring to parameter names +- Change macro name in inflate.c to avoid collision in VxWorks +- Correct typo in blast.c +- Improve portability of contrib/minizip +- Fix indentation in minizip's zip.c +- Replace black/white with allow/block. (theresa-m) +- minizip warning fix if MAXU32 already defined. (gvollant) +- Fix unztell64() in minizip to work past 4GB. (Daniël Hörchner) +- Clean up minizip to reduce warnings for testing +- Add fallthrough comments for gcc +- Eliminate use of ULL constants +- Separate out address sanitizing from warnings in configure +- Remove destructive aspects of make distclean +- Check for cc masquerading as gcc or clang in configure +- Fix crc32.c to compile local functions only if used + Changes in 1.2.11 (15 Jan 2017) - Fix deflate stored bug when pulling last block from window - Permit immediate deflateParams changes before any deflate input @@ -511,7 +574,7 @@ Changes in 1.2.3.5 (8 Jan 2010) - Don't use _vsnprintf on later versions of MSVC [Lowman] - Add CMake build script and input file [Lowman] - Update contrib/minizip to 1.1 [Svensson, Vollant] -- Moved nintendods directory from contrib to . +- Moved nintendods directory from contrib to root - Replace gzio.c with a new set of routines with the same functionality - Add gzbuffer(), gzoffset(), gzclose_r(), gzclose_w() as part of above - Update contrib/minizip to 1.1b @@ -685,7 +748,7 @@ Changes in 1.2.2.4 (11 July 2005) - Be more strict on incomplete code sets in inflate_table() and increase ENOUGH and MAXD -- this repairs a possible security vulnerability for invalid inflate input. Thanks to Tavis Ormandy and Markus Oberhumer for - discovering the vulnerability and providing test cases. + discovering the vulnerability and providing test cases - Add ia64 support to configure for HP-UX [Smith] - Add error return to gzread() for format or i/o error [Levin] - Use malloc.h for OS/2 [Necasek] @@ -721,7 +784,7 @@ Changes in 1.2.2.2 (30 December 2004) - Add Z_FIXED strategy option to deflateInit2() to force fixed trees - Add updated make_vms.com [Coghlan], update README - Create a new "examples" directory, move gzappend.c there, add zpipe.c, - fitblk.c, gzlog.[ch], gzjoin.c, and zlib_how.html. + fitblk.c, gzlog.[ch], gzjoin.c, and zlib_how.html - Add FAQ entry and comments in deflate.c on uninitialized memory access - Add Solaris 9 make options in configure [Gilbert] - Allow strerror() usage in gzio.c for STDC @@ -792,7 +855,7 @@ Changes in 1.2.1.1 (9 January 2004) - Fix a big fat bug in inftrees.c that prevented decoding valid dynamic blocks with only literals and no distance codes -- Thanks to "Hot Emu" for the bug report and sample file -- Add a note to puff.c on no distance codes case. +- Add a note to puff.c on no distance codes case Changes in 1.2.1 (17 November 2003) - Remove a tab in contrib/gzappend/gzappend.c @@ -1036,14 +1099,14 @@ Changes in 1.2.0 (9 March 2003) - Add contrib/puff/ simple inflate for deflate format description Changes in 1.1.4 (11 March 2002) -- ZFREE was repeated on same allocation on some error conditions. +- ZFREE was repeated on same allocation on some error conditions This creates a security problem described in http://www.zlib.org/advisory-2002-03-11.txt - Returned incorrect error (Z_MEM_ERROR) on some invalid data - Avoid accesses before window for invalid distances with inflate window - less than 32K. + less than 32K - force windowBits > 8 to avoid a bug in the encoder for a window size - of 256 bytes. (A complete fix will be available in 1.1.5). + of 256 bytes. (A complete fix will be available in 1.1.5) Changes in 1.1.3 (9 July 1998) - fix "an inflate input buffer bug that shows up on rare but persistent @@ -1117,7 +1180,7 @@ Changes in 1.1.1 (27 Feb 98) - remove block truncation heuristic which had very marginal effect for zlib (smaller lit_bufsize than in gzip 1.2.4) and degraded a little the compression ratio on some files. This also allows inlining _tr_tally for - matches in deflate_slow. + matches in deflate_slow - added msdos/Makefile.w32 for WIN32 Microsoft Visual C++ (Bob Frazier) Changes in 1.1.0 (24 Feb 98) @@ -1162,7 +1225,7 @@ Changes in 1.0.8 (27 Jan 1998) - include sys/types.h to get off_t on some systems (Marc Lehmann & QingLong) - use constant arrays for the static trees in trees.c instead of computing them at run time (thanks to Ken Raeburn for this suggestion). To create - trees.h, compile with GEN_TREES_H and run "make test". + trees.h, compile with GEN_TREES_H and run "make test" - check return code of example in "make test" and display result - pass minigzip command line options to file_compress - simplifying code of inflateSync to avoid gcc 2.8 bug @@ -1201,12 +1264,12 @@ Changes in 1.0.6 (19 Jan 1998) - add functions gzprintf, gzputc, gzgetc, gztell, gzeof, gzseek, gzrewind and gzsetparams (thanks to Roland Giersig and Kevin Ruland for some of this code) - Fix a deflate bug occurring only with compression level 0 (thanks to - Andy Buckler for finding this one). -- In minigzip, pass transparently also the first byte for .Z files. + Andy Buckler for finding this one) +- In minigzip, pass transparently also the first byte for .Z files - return Z_BUF_ERROR instead of Z_OK if output buffer full in uncompress() - check Z_FINISH in inflate (thanks to Marc Schluper) - Implement deflateCopy (thanks to Adam Costello) -- make static libraries by default in configure, add --shared option. +- make static libraries by default in configure, add --shared option - move MSDOS or Windows specific files to directory msdos - suppress the notion of partial flush to simplify the interface (but the symbol Z_PARTIAL_FLUSH is kept for compatibility with 1.0.4) @@ -1218,7 +1281,7 @@ Changes in 1.0.6 (19 Jan 1998) - added Makefile.nt (thanks to Stephen Williams) - added the unsupported "contrib" directory: contrib/asm386/ by Gilles Vollant - 386 asm code replacing longest_match(). + 386 asm code replacing longest_match() contrib/iostream/ by Kevin Ruland A C++ I/O streams interface to the zlib gz* functions contrib/iostream2/ by Tyge Løvset @@ -1226,7 +1289,7 @@ Changes in 1.0.6 (19 Jan 1998) contrib/untgz/ by "Pedro A. Aranda Guti\irrez" A very simple tar.gz file extractor using zlib contrib/visual-basic.txt by Carlos Rios - How to use compress(), uncompress() and the gz* functions from VB. + How to use compress(), uncompress() and the gz* functions from VB - pass params -f (filtered data), -h (huffman only), -1 to -9 (compression level) in minigzip (thanks to Tom Lane) @@ -1235,8 +1298,8 @@ Changes in 1.0.6 (19 Jan 1998) - add undocumented function inflateSyncPoint() (hack for Paul Mackerras) - add undocumented function zError to convert error code to string (for Tim Smithers) -- Allow compilation of gzio with -DNO_DEFLATE to avoid the compression code. -- Use default memcpy for Symantec MSDOS compiler. +- Allow compilation of gzio with -DNO_DEFLATE to avoid the compression code +- Use default memcpy for Symantec MSDOS compiler - Add EXPORT keyword for check_func (needed for Windows DLL) - add current directory to LD_LIBRARY_PATH for "make test" - create also a link for libz.so.1 @@ -1249,7 +1312,7 @@ Changes in 1.0.6 (19 Jan 1998) - allow compilation with ANSI keywords only enabled for TurboC in large model - avoid "versionString"[0] (Borland bug) - add NEED_DUMMY_RETURN for Borland -- use variable z_verbose for tracing in debug mode (L. Peter Deutsch). +- use variable z_verbose for tracing in debug mode (L. Peter Deutsch) - allow compilation with CC - defined STDC for OS/2 (David Charlap) - limit external names to 8 chars for MVS (Thomas Lund) @@ -1259,7 +1322,7 @@ Changes in 1.0.6 (19 Jan 1998) - use _fdopen instead of fdopen for MSC >= 6.0 (Thomas Fanslau) - added makelcc.bat for lcc-win32 (Tom St Denis) - in Makefile.dj2, use copy and del instead of install and rm (Frank Donahoe) -- Avoid expanded $Id$. Use "rcs -kb" or "cvs admin -kb" to avoid Id expansion. +- Avoid expanded $Id$. Use "rcs -kb" or "cvs admin -kb" to avoid Id expansion - check for unistd.h in configure (for off_t) - remove useless check parameter in inflate_blocks_free - avoid useless assignment of s->check to itself in inflate_blocks_new @@ -1280,7 +1343,7 @@ Changes in 1.0.5 (3 Jan 98) Changes in 1.0.4 (24 Jul 96) - In very rare conditions, deflate(s, Z_FINISH) could fail to produce an EOF bit, so the decompressor could decompress all the correct data but went - on to attempt decompressing extra garbage data. This affected minigzip too. + on to attempt decompressing extra garbage data. This affected minigzip too - zlibVersion and gzerror return const char* (needed for DLL) - port to RISCOS (no fdopen, no multiple dots, no unlink, no fileno) - use z_error only for DEBUG (avoid problem with DLLs) @@ -1310,7 +1373,7 @@ Changes in 1.0.1 (20 May 96) [1.0 skipped to avoid confusion] - fix array overlay in deflate.c which sometimes caused bad compressed data - fix inflate bug with empty stored block - fix MSDOS medium model which was broken in 0.99 -- fix deflateParams() which could generate bad compressed data. +- fix deflateParams() which could generate bad compressed data - Bytef is define'd instead of typedef'ed (work around Borland bug) - added an INDEX file - new makefiles for DJGPP (Makefile.dj2), 32-bit Borland (Makefile.b32), @@ -1331,7 +1394,7 @@ Changes in 0.99 (27 Jan 96) - allow preset dictionary shared between compressor and decompressor - allow compression level 0 (no compression) - add deflateParams in zlib.h: allow dynamic change of compression level - and compression strategy. + and compression strategy - test large buffers and deflateParams in example.c - add optional "configure" to build zlib as a shared library - suppress Makefile.qnx, use configure instead @@ -1373,30 +1436,30 @@ Changes in 0.99 (27 Jan 96) - use STDC instead of __GO32__ to avoid redeclaring exit, calloc, etc... - use Z_BINARY instead of BINARY - document that gzclose after gzdopen will close the file -- allow "a" as mode in gzopen. +- allow "a" as mode in gzopen - fix error checking in gzread - allow skipping .gz extra-field on pipes - added reference to Perl interface in README - put the crc table in FAR data (I dislike more and more the medium model :) - added get_crc_table -- added a dimension to all arrays (Borland C can't count). +- added a dimension to all arrays (Borland C can't count) - workaround Borland C bug in declaration of inflate_codes_new & inflate_fast - guard against multiple inclusion of *.h (for precompiled header on Mac) -- Watcom C pretends to be Microsoft C small model even in 32 bit mode. +- Watcom C pretends to be Microsoft C small model even in 32 bit mode - don't use unsized arrays to avoid silly warnings by Visual C++: warning C4746: 'inflate_mask' : unsized array treated as '__far' - (what's wrong with far data in far model?). + (what's wrong with far data in far model?) - define enum out of inflate_blocks_state to allow compilation with C++ Changes in 0.95 (16 Aug 95) - fix MSDOS small and medium model (now easier to adapt to any compiler) - inlined send_bits - fix the final (:-) bug for deflate with flush (output was correct but - not completely flushed in rare occasions). + not completely flushed in rare occasions) - default window size is same for compression and decompression - (it's now sufficient to set MAX_WBITS in zconf.h). + (it's now sufficient to set MAX_WBITS in zconf.h) - voidp -> voidpf and voidnp -> voidp (for consistency with other - typedefs and because voidnp was not near in large model). + typedefs and because voidnp was not near in large model) Changes in 0.94 (13 Aug 95) - support MSDOS medium model @@ -1405,12 +1468,12 @@ Changes in 0.94 (13 Aug 95) - added support for VMS - allow a compression level in gzopen() - gzflush now calls fflush -- For deflate with flush, flush even if no more input is provided. +- For deflate with flush, flush even if no more input is provided - rename libgz.a as libz.a - avoid complex expression in infcodes.c triggering Turbo C bug - work around a problem with gcc on Alpha (in INSERT_STRING) - don't use inline functions (problem with some gcc versions) -- allow renaming of Byte, uInt, etc... with #define. +- allow renaming of Byte, uInt, etc... with #define - avoid warning about (unused) pointer before start of array in deflate.c - avoid various warnings in gzio.c, example.c, infblock.c, adler32.c, zutil.c - avoid reserved word 'new' in trees.c @@ -1429,7 +1492,7 @@ Changes in 0.92 (3 May 95) - no memcpy on Pyramid - suppressed inftest.c - optimized fill_window, put longest_match inline for gcc -- optimized inflate on stored blocks. +- optimized inflate on stored blocks - untabify all sources to simplify patches Changes in 0.91 (2 May 95) @@ -1447,7 +1510,7 @@ Changes in 0.9 (1 May 95) - let again gzread copy uncompressed data unchanged (was working in 0.71) - deflate(Z_FULL_FLUSH), inflateReset and inflateSync are now fully implemented - added a test of inflateSync in example.c -- moved MAX_WBITS to zconf.h because users might want to change that. +- moved MAX_WBITS to zconf.h because users might want to change that - document explicitly that zalloc(64K) on MSDOS must return a normalized pointer (zero offset) - added Makefiles for Microsoft C, Turbo C, Borland C++ @@ -1456,7 +1519,7 @@ Changes in 0.9 (1 May 95) Changes in 0.8 (29 April 95) - added fast inflate (inffast.c) - deflate(Z_FINISH) now returns Z_STREAM_END when done. Warning: this - is incompatible with previous versions of zlib which returned Z_OK. + is incompatible with previous versions of zlib which returned Z_OK - work around a TurboC compiler bug (bad code for b << 0, see infutil.h) (actually that was not a compiler bug, see 0.81 above) - gzread no longer reads one extra byte in certain cases @@ -1466,50 +1529,50 @@ Changes in 0.8 (29 April 95) Changes in 0.71 (14 April 95) - Fixed more MSDOS compilation problems :( There is still a bug with - TurboC large model. + TurboC large model Changes in 0.7 (14 April 95) -- Added full inflate support. +- Added full inflate support - Simplified the crc32() interface. The pre- and post-conditioning (one's complement) is now done inside crc32(). WARNING: this is - incompatible with previous versions; see zlib.h for the new usage. + incompatible with previous versions; see zlib.h for the new usage Changes in 0.61 (12 April 95) -- workaround for a bug in TurboC. example and minigzip now work on MSDOS. +- workaround for a bug in TurboC. example and minigzip now work on MSDOS Changes in 0.6 (11 April 95) - added minigzip.c - added gzdopen to reopen a file descriptor as gzFile -- added transparent reading of non-gziped files in gzread. +- added transparent reading of non-gziped files in gzread - fixed bug in gzread (don't read crc as data) -- fixed bug in destroy (gzio.c) (don't return Z_STREAM_END for gzclose). +- fixed bug in destroy (gzio.c) (don't return Z_STREAM_END for gzclose) - don't allocate big arrays in the stack (for MSDOS) - fix some MSDOS compilation problems Changes in 0.5: - do real compression in deflate.c. Z_PARTIAL_FLUSH is supported but - not yet Z_FULL_FLUSH. + not yet Z_FULL_FLUSH - support decompression but only in a single step (forced Z_FINISH) -- added opaque object for zalloc and zfree. +- added opaque object for zalloc and zfree - added deflateReset and inflateReset -- added a variable zlib_version for consistency checking. -- renamed the 'filter' parameter of deflateInit2 as 'strategy'. - Added Z_FILTERED and Z_HUFFMAN_ONLY constants. +- added a variable zlib_version for consistency checking +- renamed the 'filter' parameter of deflateInit2 as 'strategy' + Added Z_FILTERED and Z_HUFFMAN_ONLY constants Changes in 0.4: -- avoid "zip" everywhere, use zlib instead of ziplib. +- avoid "zip" everywhere, use zlib instead of ziplib - suppress Z_BLOCK_FLUSH, interpret Z_PARTIAL_FLUSH as block flush - if compression method == 8. + if compression method == 8 - added adler32 and crc32 - renamed deflateOptions as deflateInit2, call one or the other but not both -- added the method parameter for deflateInit2. +- added the method parameter for deflateInit2 - added inflateInit2 - simplied considerably deflateInit and inflateInit by not supporting user-provided history buffer. This is supported only in deflateInit2 - and inflateInit2. + and inflateInit2 Changes in 0.3: - prefix all macro names with Z_ -- use Z_FINISH instead of deflateEnd to finish compression. +- use Z_FINISH instead of deflateEnd to finish compression - added Z_HUFFMAN_ONLY - added gzerror() diff --git a/3rdparty/zlib/README b/3rdparty/zlib/README index 51106de475..024b79d3d8 100644 --- a/3rdparty/zlib/README +++ b/3rdparty/zlib/README @@ -1,6 +1,6 @@ ZLIB DATA COMPRESSION LIBRARY -zlib 1.2.11 is a general purpose data compression library. All the code is +zlib 1.2.12 is a general purpose data compression library. All the code is thread safe. The data format used by the zlib library is described by RFCs (Request for Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950 (zlib format), rfc1951 (deflate format) and @@ -31,7 +31,7 @@ Mark Nelson wrote an article about zlib for the Jan. 1997 issue of Dr. Dobb's Journal; a copy of the article is available at http://marknelson.us/1997/01/01/zlib-engine/ . -The changes made in version 1.2.11 are documented in the file ChangeLog. +The changes made in version 1.2.12 are documented in the file ChangeLog. Unsupported third party contributions are provided in directory contrib/ . @@ -84,7 +84,7 @@ Acknowledgments: Copyright notice: - (C) 1995-2017 Jean-loup Gailly and Mark Adler + (C) 1995-2022 Jean-loup Gailly and Mark Adler This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages @@ -108,7 +108,10 @@ Copyright notice: If you use the zlib library in a product, we would appreciate *not* receiving lengthy legal documents to sign. The sources are provided for free but without warranty of any kind. The library has been entirely written by Jean-loup -Gailly and Mark Adler; it does not include third-party code. +Gailly and Mark Adler; it does not include third-party code. We make all +contributions to and distributions of this project solely in our personal +capacity, and are not conveying any rights to any intellectual property of +any third parties. If you redistribute modified sources, we would appreciate that you include in the file ChangeLog history information documenting your changes. Please read diff --git a/3rdparty/zlib/crc32.c b/3rdparty/zlib/crc32.c index 9580440c0e..a1bdce5c23 100644 --- a/3rdparty/zlib/crc32.c +++ b/3rdparty/zlib/crc32.c @@ -1,12 +1,10 @@ /* crc32.c -- compute the CRC-32 of a data stream - * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016 Mark Adler + * Copyright (C) 1995-2022 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h * - * Thanks to Rodney Brown for his contribution of faster - * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing - * tables for updating the shift register in one step with three exclusive-ors - * instead of four steps with four exclusive-ors. This results in about a - * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. + * This interleaved implementation of a CRC makes use of pipelined multiple + * arithmetic-logic units, commonly found in modern CPU cores. It is due to + * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. */ /* @(#) $Id$ */ @@ -14,11 +12,12 @@ /* Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore protection on the static variables used to control the first-use generation - of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should + of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should first call get_crc_table() to initialize the tables before allowing more than one thread to use crc32(). - DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h. + MAKECRCH can be #defined to write out crc32.h. A main() routine is also + produced, so that this one source file can be compiled to an executable. */ #ifdef MAKECRCH @@ -28,175 +27,713 @@ # endif /* !DYNAMIC_CRC_TABLE */ #endif /* MAKECRCH */ -#include "zutil.h" /* for STDC and FAR definitions */ +#include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */ -/* Definitions for doing the crc four data bytes at a time. */ -#if !defined(NOBYFOUR) && defined(Z_U4) -# define BYFOUR -#endif -#ifdef BYFOUR - local unsigned long crc32_little OF((unsigned long, - const unsigned char FAR *, z_size_t)); - local unsigned long crc32_big OF((unsigned long, - const unsigned char FAR *, z_size_t)); -# define TBLS 8 + /* + A CRC of a message is computed on N braids of words in the message, where + each word consists of W bytes (4 or 8). If N is 3, for example, then three + running sparse CRCs are calculated respectively on each braid, at these + indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ... + This is done starting at a word boundary, and continues until as many blocks + of N * W bytes as are available have been processed. The results are combined + into a single CRC at the end. For this code, N must be in the range 1..6 and + W must be 4 or 8. The upper limit on N can be increased if desired by adding + more #if blocks, extending the patterns apparent in the code. In addition, + crc32.h would need to be regenerated, if the maximum N value is increased. + + N and W are chosen empirically by benchmarking the execution time on a given + processor. The choices for N and W below were based on testing on Intel Kaby + Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64 + Octeon II processors. The Intel, AMD, and ARM processors were all fastest + with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4. + They were all tested with either gcc or clang, all using the -O3 optimization + level. Your mileage may vary. + */ + +/* Define N */ +#ifdef Z_TESTN +# define N Z_TESTN #else -# define TBLS 1 -#endif /* BYFOUR */ +# define N 5 +#endif +#if N < 1 || N > 6 +# error N must be in 1..6 +#endif -/* Local functions for crc concatenation */ -local unsigned long gf2_matrix_times OF((unsigned long *mat, - unsigned long vec)); -local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat)); -local uLong crc32_combine_ OF((uLong crc1, uLong crc2, z_off64_t len2)); +/* + z_crc_t must be at least 32 bits. z_word_t must be at least as long as + z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and + that bytes are eight bits. + */ +/* + Define W and the associated z_word_t type. If W is not defined, then a + braided calculation is not used, and the associated tables and code are not + compiled. + */ +#ifdef Z_TESTW +# if Z_TESTW-1 != -1 +# define W Z_TESTW +# endif +#else +# ifdef MAKECRCH +# define W 8 /* required for MAKECRCH */ +# else +# if defined(__x86_64__) || defined(__aarch64__) +# define W 8 +# else +# define W 4 +# endif +# endif +#endif +#ifdef W +# if W == 8 && defined(Z_U8) + typedef Z_U8 z_word_t; +# elif defined(Z_U4) +# undef W +# define W 4 + typedef Z_U4 z_word_t; +# else +# undef W +# endif +#endif + +/* Local functions. */ +local z_crc_t multmodp OF((z_crc_t a, z_crc_t b)); +local z_crc_t x2nmodp OF((z_off64_t n, unsigned k)); + +/* If available, use the ARM processor CRC32 instruction. */ +#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8 +# define ARMCRC32 +#endif + +#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE)) +/* + Swap the bytes in a z_word_t to convert between little and big endian. Any + self-respecting compiler will optimize this to a single machine byte-swap + instruction, if one is available. This assumes that word_t is either 32 bits + or 64 bits. + */ +local z_word_t byte_swap(word) + z_word_t word; +{ +# if W == 8 + return + (word & 0xff00000000000000) >> 56 | + (word & 0xff000000000000) >> 40 | + (word & 0xff0000000000) >> 24 | + (word & 0xff00000000) >> 8 | + (word & 0xff000000) << 8 | + (word & 0xff0000) << 24 | + (word & 0xff00) << 40 | + (word & 0xff) << 56; +# else /* W == 4 */ + return + (word & 0xff000000) >> 24 | + (word & 0xff0000) >> 8 | + (word & 0xff00) << 8 | + (word & 0xff) << 24; +# endif +} +#endif + +/* CRC polynomial. */ +#define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */ #ifdef DYNAMIC_CRC_TABLE -local volatile int crc_table_empty = 1; -local z_crc_t FAR crc_table[TBLS][256]; +local z_crc_t FAR crc_table[256]; +local z_crc_t FAR x2n_table[32]; local void make_crc_table OF((void)); +#ifdef W + local z_word_t FAR crc_big_table[256]; + local z_crc_t FAR crc_braid_table[W][256]; + local z_word_t FAR crc_braid_big_table[W][256]; + local void braid OF((z_crc_t [][256], z_word_t [][256], int, int)); +#endif #ifdef MAKECRCH - local void write_table OF((FILE *, const z_crc_t FAR *)); + local void write_table OF((FILE *, const z_crc_t FAR *, int)); + local void write_table32hi OF((FILE *, const z_word_t FAR *, int)); + local void write_table64 OF((FILE *, const z_word_t FAR *, int)); #endif /* MAKECRCH */ + +/* + Define a once() function depending on the availability of atomics. If this is + compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in + multiple threads, and if atomics are not available, then get_crc_table() must + be called to initialize the tables and must return before any threads are + allowed to compute or combine CRCs. + */ + +/* Definition of once functionality. */ +typedef struct once_s once_t; +local void once OF((once_t *, void (*)(void))); + +/* Check for the availability of atomics. */ +#if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \ + !defined(__STDC_NO_ATOMICS__) + +#include + +/* Structure for once(), which must be initialized with ONCE_INIT. */ +struct once_s { + atomic_flag begun; + atomic_int done; +}; +#define ONCE_INIT {ATOMIC_FLAG_INIT, 0} + +/* + Run the provided init() function exactly once, even if multiple threads + invoke once() at the same time. The state must be a once_t initialized with + ONCE_INIT. + */ +local void once(state, init) + once_t *state; + void (*init)(void); +{ + if (!atomic_load(&state->done)) { + if (atomic_flag_test_and_set(&state->begun)) + while (!atomic_load(&state->done)) + ; + else { + init(); + atomic_store(&state->done, 1); + } + } +} + +#else /* no atomics */ + +/* Structure for once(), which must be initialized with ONCE_INIT. */ +struct once_s { + volatile int begun; + volatile int done; +}; +#define ONCE_INIT {0, 0} + +/* Test and set. Alas, not atomic, but tries to minimize the period of + vulnerability. */ +local int test_and_set OF((int volatile *)); +local int test_and_set(flag) + int volatile *flag; +{ + int was; + + was = *flag; + *flag = 1; + return was; +} + +/* Run the provided init() function once. This is not thread-safe. */ +local void once(state, init) + once_t *state; + void (*init)(void); +{ + if (!state->done) { + if (test_and_set(&state->begun)) + while (!state->done) + ; + else { + init(); + state->done = 1; + } + } +} + +#endif + +/* State for once(). */ +local once_t made = ONCE_INIT; + /* Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. Polynomials over GF(2) are represented in binary, one bit per coefficient, - with the lowest powers in the most significant bit. Then adding polynomials + with the lowest powers in the most significant bit. Then adding polynomials is just exclusive-or, and multiplying a polynomial by x is a right shift by - one. If we call the above polynomial p, and represent a byte as the + one. If we call the above polynomial p, and represent a byte as the polynomial q, also with the lowest power in the most significant bit (so the - byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, + byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p, where a mod b means the remainder after dividing a by b. This calculation is done using the shift-register method of multiplying and - taking the remainder. The register is initialized to zero, and for each + taking the remainder. The register is initialized to zero, and for each incoming bit, x^32 is added mod p to the register if the bit is a one (where - x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by - x (which is shifting right by one and adding x^32 mod p if the bit shifted - out is a one). We start with the highest power (least significant bit) of - q and repeat for all eight bits of q. + x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x + (which is shifting right by one and adding x^32 mod p if the bit shifted out + is a one). We start with the highest power (least significant bit) of q and + repeat for all eight bits of q. + + The table is simply the CRC of all possible eight bit values. This is all the + information needed to generate CRCs on data a byte at a time for all + combinations of CRC register values and incoming bytes. + */ - The first table is simply the CRC of all possible eight bit values. This is - all the information needed to generate CRCs on data a byte at a time for all - combinations of CRC register values and incoming bytes. The remaining tables - allow for word-at-a-time CRC calculation for both big-endian and little- - endian machines, where a word is four bytes. -*/ local void make_crc_table() { - z_crc_t c; - int n, k; - z_crc_t poly; /* polynomial exclusive-or pattern */ - /* terms of polynomial defining this crc (except x^32): */ - static volatile int first = 1; /* flag to limit concurrent making */ - static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; + unsigned i, j, n; + z_crc_t p; - /* See if another task is already doing this (not thread-safe, but better - than nothing -- significantly reduces duration of vulnerability in - case the advice about DYNAMIC_CRC_TABLE is ignored) */ - if (first) { - first = 0; - - /* make exclusive-or pattern from polynomial (0xedb88320UL) */ - poly = 0; - for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++) - poly |= (z_crc_t)1 << (31 - p[n]); - - /* generate a crc for every 8-bit value */ - for (n = 0; n < 256; n++) { - c = (z_crc_t)n; - for (k = 0; k < 8; k++) - c = c & 1 ? poly ^ (c >> 1) : c >> 1; - crc_table[0][n] = c; - } - -#ifdef BYFOUR - /* generate crc for each value followed by one, two, and three zeros, - and then the byte reversal of those as well as the first table */ - for (n = 0; n < 256; n++) { - c = crc_table[0][n]; - crc_table[4][n] = ZSWAP32(c); - for (k = 1; k < 4; k++) { - c = crc_table[0][c & 0xff] ^ (c >> 8); - crc_table[k][n] = c; - crc_table[k + 4][n] = ZSWAP32(c); - } - } -#endif /* BYFOUR */ - - crc_table_empty = 0; - } - else { /* not first */ - /* wait for the other guy to finish (not efficient, but rare) */ - while (crc_table_empty) - ; + /* initialize the CRC of bytes tables */ + for (i = 0; i < 256; i++) { + p = i; + for (j = 0; j < 8; j++) + p = p & 1 ? (p >> 1) ^ POLY : p >> 1; + crc_table[i] = p; +#ifdef W + crc_big_table[i] = byte_swap(p); +#endif } + /* initialize the x^2^n mod p(x) table */ + p = (z_crc_t)1 << 30; /* x^1 */ + x2n_table[0] = p; + for (n = 1; n < 32; n++) + x2n_table[n] = p = multmodp(p, p); + +#ifdef W + /* initialize the braiding tables -- needs x2n_table[] */ + braid(crc_braid_table, crc_braid_big_table, N, W); +#endif + #ifdef MAKECRCH - /* write out CRC tables to crc32.h */ { + /* + The crc32.h header file contains tables for both 32-bit and 64-bit + z_word_t's, and so requires a 64-bit type be available. In that case, + z_word_t must be defined to be 64-bits. This code then also generates + and writes out the tables for the case that z_word_t is 32 bits. + */ +#if !defined(W) || W != 8 +# error Need a 64-bit integer type in order to generate crc32.h. +#endif FILE *out; + int k, n; + z_crc_t ltl[8][256]; + z_word_t big[8][256]; out = fopen("crc32.h", "w"); if (out == NULL) return; - fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n"); - fprintf(out, " * Generated automatically by crc32.c\n */\n\n"); - fprintf(out, "local const z_crc_t FAR "); - fprintf(out, "crc_table[TBLS][256] =\n{\n {\n"); - write_table(out, crc_table[0]); -# ifdef BYFOUR - fprintf(out, "#ifdef BYFOUR\n"); - for (k = 1; k < 8; k++) { - fprintf(out, " },\n {\n"); - write_table(out, crc_table[k]); + + /* write out little-endian CRC table to crc32.h */ + fprintf(out, + "/* crc32.h -- tables for rapid CRC calculation\n" + " * Generated automatically by crc32.c\n */\n" + "\n" + "local const z_crc_t FAR crc_table[] = {\n" + " "); + write_table(out, crc_table, 256); + fprintf(out, + "};\n"); + + /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */ + fprintf(out, + "\n" + "#ifdef W\n" + "\n" + "#if W == 8\n" + "\n" + "local const z_word_t FAR crc_big_table[] = {\n" + " "); + write_table64(out, crc_big_table, 256); + fprintf(out, + "};\n"); + + /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */ + fprintf(out, + "\n" + "#else /* W == 4 */\n" + "\n" + "local const z_word_t FAR crc_big_table[] = {\n" + " "); + write_table32hi(out, crc_big_table, 256); + fprintf(out, + "};\n" + "\n" + "#endif\n"); + + /* write out braid tables for each value of N */ + for (n = 1; n <= 6; n++) { + fprintf(out, + "\n" + "#if N == %d\n", n); + + /* compute braid tables for this N and 64-bit word_t */ + braid(ltl, big, n, 8); + + /* write out braid tables for 64-bit z_word_t to crc32.h */ + fprintf(out, + "\n" + "#if W == 8\n" + "\n" + "local const z_crc_t FAR crc_braid_table[][256] = {\n"); + for (k = 0; k < 8; k++) { + fprintf(out, " {"); + write_table(out, ltl[k], 256); + fprintf(out, "}%s", k < 7 ? ",\n" : ""); + } + fprintf(out, + "};\n" + "\n" + "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); + for (k = 0; k < 8; k++) { + fprintf(out, " {"); + write_table64(out, big[k], 256); + fprintf(out, "}%s", k < 7 ? ",\n" : ""); + } + fprintf(out, + "};\n"); + + /* compute braid tables for this N and 32-bit word_t */ + braid(ltl, big, n, 4); + + /* write out braid tables for 32-bit z_word_t to crc32.h */ + fprintf(out, + "\n" + "#else /* W == 4 */\n" + "\n" + "local const z_crc_t FAR crc_braid_table[][256] = {\n"); + for (k = 0; k < 4; k++) { + fprintf(out, " {"); + write_table(out, ltl[k], 256); + fprintf(out, "}%s", k < 3 ? ",\n" : ""); + } + fprintf(out, + "};\n" + "\n" + "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); + for (k = 0; k < 4; k++) { + fprintf(out, " {"); + write_table32hi(out, big[k], 256); + fprintf(out, "}%s", k < 3 ? ",\n" : ""); + } + fprintf(out, + "};\n" + "\n" + "#endif\n" + "\n" + "#endif\n"); } - fprintf(out, "#endif\n"); -# endif /* BYFOUR */ - fprintf(out, " }\n};\n"); + fprintf(out, + "\n" + "#endif\n"); + + /* write out zeros operator table to crc32.h */ + fprintf(out, + "\n" + "local const z_crc_t FAR x2n_table[] = {\n" + " "); + write_table(out, x2n_table, 32); + fprintf(out, + "};\n"); fclose(out); } #endif /* MAKECRCH */ } #ifdef MAKECRCH -local void write_table(out, table) + +/* + Write the 32-bit values in table[0..k-1] to out, five per line in + hexadecimal separated by commas. + */ +local void write_table(out, table, k) FILE *out; const z_crc_t FAR *table; + int k; { int n; - for (n = 0; n < 256; n++) - fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", + for (n = 0; n < k; n++) + fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", (unsigned long)(table[n]), - n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", ")); + n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); } + +/* + Write the high 32-bits of each value in table[0..k-1] to out, five per line + in hexadecimal separated by commas. + */ +local void write_table32hi(out, table, k) +FILE *out; +const z_word_t FAR *table; +int k; +{ + int n; + + for (n = 0; n < k; n++) + fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", + (unsigned long)(table[n] >> 32), + n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); +} + +/* + Write the 64-bit values in table[0..k-1] to out, three per line in + hexadecimal separated by commas. This assumes that if there is a 64-bit + type, then there is also a long long integer type, and it is at least 64 + bits. If not, then the type cast and format string can be adjusted + accordingly. + */ +local void write_table64(out, table, k) + FILE *out; + const z_word_t FAR *table; + int k; +{ + int n; + + for (n = 0; n < k; n++) + fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ", + (unsigned long long)(table[n]), + n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", ")); +} + +/* Actually do the deed. */ +int main() +{ + make_crc_table(); + return 0; +} + #endif /* MAKECRCH */ +#ifdef W +/* + Generate the little and big-endian braid tables for the given n and z_word_t + size w. Each array must have room for w blocks of 256 elements. + */ +local void braid(ltl, big, n, w) + z_crc_t ltl[][256]; + z_word_t big[][256]; + int n; + int w; +{ + int k; + z_crc_t i, p, q; + for (k = 0; k < w; k++) { + p = x2nmodp((n * w + 3 - k) << 3, 0); + ltl[k][0] = 0; + big[w - 1 - k][0] = 0; + for (i = 1; i < 256; i++) { + ltl[k][i] = q = multmodp(i << 24, p); + big[w - 1 - k][i] = byte_swap(q); + } + } +} +#endif + #else /* !DYNAMIC_CRC_TABLE */ /* ======================================================================== - * Tables of CRC-32s of all single-byte values, made by make_crc_table(). + * Tables for byte-wise and braided CRC-32 calculations, and a table of powers + * of x for combining CRC-32s, all made by make_crc_table(). */ #include "crc32.h" #endif /* DYNAMIC_CRC_TABLE */ +/* ======================================================================== + * Routines used for CRC calculation. Some are also required for the table + * generation above. + */ + +/* + Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial, + reflected. For speed, this requires that a not be zero. + */ +local z_crc_t multmodp(a, b) + z_crc_t a; + z_crc_t b; +{ + z_crc_t m, p; + + m = (z_crc_t)1 << 31; + p = 0; + for (;;) { + if (a & m) { + p ^= b; + if ((a & (m - 1)) == 0) + break; + } + m >>= 1; + b = b & 1 ? (b >> 1) ^ POLY : b >> 1; + } + return p; +} + +/* + Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been + initialized. + */ +local z_crc_t x2nmodp(n, k) + z_off64_t n; + unsigned k; +{ + z_crc_t p; + + p = (z_crc_t)1 << 31; /* x^0 == 1 */ + while (n) { + if (n & 1) + p = multmodp(x2n_table[k & 31], p); + n >>= 1; + k++; + } + return p; +} + /* ========================================================================= - * This function can be used by asm versions of crc32() + * This function can be used by asm versions of crc32(), and to force the + * generation of the CRC tables in a threaded application. */ const z_crc_t FAR * ZEXPORT get_crc_table() { #ifdef DYNAMIC_CRC_TABLE - if (crc_table_empty) - make_crc_table(); + once(&made, make_crc_table); #endif /* DYNAMIC_CRC_TABLE */ return (const z_crc_t FAR *)crc_table; } -/* ========================================================================= */ -#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) -#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 +/* ========================================================================= + * Use ARM machine instructions if available. This will compute the CRC about + * ten times faster than the braided calculation. This code does not check for + * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will + * only be defined if the compilation specifies an ARM processor architecture + * that has the instructions. For example, compiling with -march=armv8.1-a or + * -march=armv8-a+crc, or -march=native if the compile machine has the crc32 + * instructions. + */ +#ifdef ARMCRC32 + +/* + Constants empirically determined to maximize speed. These values are from + measurements on a Cortex-A57. Your mileage may vary. + */ +#define Z_BATCH 3990 /* number of words in a batch */ +#define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */ +#define Z_BATCH_MIN 800 /* fewest words in a final batch */ + +unsigned long ZEXPORT crc32_z(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + z_size_t len; +{ + z_crc_t val; + z_word_t crc1, crc2; + const z_word_t *word; + z_word_t val0, val1, val2; + z_size_t last, last2, i; + z_size_t num; + + /* Return initial CRC, if requested. */ + if (buf == Z_NULL) return 0; + +#ifdef DYNAMIC_CRC_TABLE + once(&made, make_crc_table); +#endif /* DYNAMIC_CRC_TABLE */ + + /* Pre-condition the CRC */ + crc ^= 0xffffffff; + + /* Compute the CRC up to a word boundary. */ + while (len && ((z_size_t)buf & 7) != 0) { + len--; + val = *buf++; + __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); + } + + /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */ + word = (z_word_t const *)buf; + num = len >> 3; + len &= 7; + + /* Do three interleaved CRCs to realize the throughput of one crc32x + instruction per cycle. Each CRC is calcuated on Z_BATCH words. The three + CRCs are combined into a single CRC after each set of batches. */ + while (num >= 3 * Z_BATCH) { + crc1 = 0; + crc2 = 0; + for (i = 0; i < Z_BATCH; i++) { + val0 = word[i]; + val1 = word[i + Z_BATCH]; + val2 = word[i + 2 * Z_BATCH]; + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); + } + word += 3 * Z_BATCH; + num -= 3 * Z_BATCH; + crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1; + crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2; + } + + /* Do one last smaller batch with the remaining words, if there are enough + to pay for the combination of CRCs. */ + last = num / 3; + if (last >= Z_BATCH_MIN) { + last2 = last << 1; + crc1 = 0; + crc2 = 0; + for (i = 0; i < last; i++) { + val0 = word[i]; + val1 = word[i + last]; + val2 = word[i + last2]; + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); + } + word += 3 * last; + num -= 3 * last; + val = x2nmodp(last, 6); + crc = multmodp(val, crc) ^ crc1; + crc = multmodp(val, crc) ^ crc2; + } + + /* Compute the CRC on any remaining words. */ + for (i = 0; i < num; i++) { + val0 = word[i]; + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); + } + word += num; + + /* Complete the CRC on any remaining bytes. */ + buf = (const unsigned char FAR *)word; + while (len) { + len--; + val = *buf++; + __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); + } + + /* Return the CRC, post-conditioned. */ + return crc ^ 0xffffffff; +} + +#else + +#ifdef W + +/* + Return the CRC of the W bytes in the word_t data, taking the + least-significant byte of the word as the first byte of data, without any pre + or post conditioning. This is used to combine the CRCs of each braid. + */ +local z_crc_t crc_word(data) + z_word_t data; +{ + int k; + for (k = 0; k < W; k++) + data = (data >> 8) ^ crc_table[data & 0xff]; + return (z_crc_t)data; +} + +local z_word_t crc_word_big(data) + z_word_t data; +{ + int k; + for (k = 0; k < W; k++) + data = (data << 8) ^ + crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; + return data; +} + +#endif /* ========================================================================= */ unsigned long ZEXPORT crc32_z(crc, buf, len) @@ -204,35 +741,324 @@ unsigned long ZEXPORT crc32_z(crc, buf, len) const unsigned char FAR *buf; z_size_t len; { - if (buf == Z_NULL) return 0UL; + /* Return initial CRC, if requested. */ + if (buf == Z_NULL) return 0; #ifdef DYNAMIC_CRC_TABLE - if (crc_table_empty) - make_crc_table(); + once(&made, make_crc_table); #endif /* DYNAMIC_CRC_TABLE */ -#ifdef BYFOUR - if (sizeof(void *) == sizeof(ptrdiff_t)) { - z_crc_t endian; + /* Pre-condition the CRC */ + crc ^= 0xffffffff; +#ifdef W + + /* If provided enough bytes, do a braided CRC calculation. */ + if (len >= N * W + W - 1) { + z_size_t blks; + z_word_t const *words; + unsigned endian; + int k; + + /* Compute the CRC up to a z_word_t boundary. */ + while (len && ((z_size_t)buf & (W - 1)) != 0) { + len--; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + } + + /* Compute the CRC on as many N z_word_t blocks as are available. */ + blks = len / (N * W); + len -= blks * N * W; + words = (z_word_t const *)buf; + + /* Do endian check at execution time instead of compile time, since ARM + processors can change the endianess at execution time. If the + compiler knows what the endianess will be, it can optimize out the + check and the unused branch. */ endian = 1; - if (*((unsigned char *)(&endian))) - return crc32_little(crc, buf, len); - else - return crc32_big(crc, buf, len); + if (*(unsigned char *)&endian) { + /* Little endian. */ + + z_crc_t crc0; + z_word_t word0; +#if N > 1 + z_crc_t crc1; + z_word_t word1; +#if N > 2 + z_crc_t crc2; + z_word_t word2; +#if N > 3 + z_crc_t crc3; + z_word_t word3; +#if N > 4 + z_crc_t crc4; + z_word_t word4; +#if N > 5 + z_crc_t crc5; + z_word_t word5; +#endif +#endif +#endif +#endif +#endif + + /* Initialize the CRC for each braid. */ + crc0 = crc; +#if N > 1 + crc1 = 0; +#if N > 2 + crc2 = 0; +#if N > 3 + crc3 = 0; +#if N > 4 + crc4 = 0; +#if N > 5 + crc5 = 0; +#endif +#endif +#endif +#endif +#endif + + /* + Process the first blks-1 blocks, computing the CRCs on each braid + independently. + */ + while (--blks) { + /* Load the word for each braid into registers. */ + word0 = crc0 ^ words[0]; +#if N > 1 + word1 = crc1 ^ words[1]; +#if N > 2 + word2 = crc2 ^ words[2]; +#if N > 3 + word3 = crc3 ^ words[3]; +#if N > 4 + word4 = crc4 ^ words[4]; +#if N > 5 + word5 = crc5 ^ words[5]; +#endif +#endif +#endif +#endif +#endif + words += N; + + /* Compute and update the CRC for each word. The loop should + get unrolled. */ + crc0 = crc_braid_table[0][word0 & 0xff]; +#if N > 1 + crc1 = crc_braid_table[0][word1 & 0xff]; +#if N > 2 + crc2 = crc_braid_table[0][word2 & 0xff]; +#if N > 3 + crc3 = crc_braid_table[0][word3 & 0xff]; +#if N > 4 + crc4 = crc_braid_table[0][word4 & 0xff]; +#if N > 5 + crc5 = crc_braid_table[0][word5 & 0xff]; +#endif +#endif +#endif +#endif +#endif + for (k = 1; k < W; k++) { + crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff]; +#if N > 1 + crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff]; +#if N > 2 + crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff]; +#if N > 3 + crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff]; +#if N > 4 + crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff]; +#if N > 5 + crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff]; +#endif +#endif +#endif +#endif +#endif + } + } + + /* + Process the last block, combining the CRCs of the N braids at the + same time. + */ + crc = crc_word(crc0 ^ words[0]); +#if N > 1 + crc = crc_word(crc1 ^ words[1] ^ crc); +#if N > 2 + crc = crc_word(crc2 ^ words[2] ^ crc); +#if N > 3 + crc = crc_word(crc3 ^ words[3] ^ crc); +#if N > 4 + crc = crc_word(crc4 ^ words[4] ^ crc); +#if N > 5 + crc = crc_word(crc5 ^ words[5] ^ crc); +#endif +#endif +#endif +#endif +#endif + words += N; + } + else { + /* Big endian. */ + + z_word_t crc0, word0, comb; +#if N > 1 + z_word_t crc1, word1; +#if N > 2 + z_word_t crc2, word2; +#if N > 3 + z_word_t crc3, word3; +#if N > 4 + z_word_t crc4, word4; +#if N > 5 + z_word_t crc5, word5; +#endif +#endif +#endif +#endif +#endif + + /* Initialize the CRC for each braid. */ + crc0 = byte_swap(crc); +#if N > 1 + crc1 = 0; +#if N > 2 + crc2 = 0; +#if N > 3 + crc3 = 0; +#if N > 4 + crc4 = 0; +#if N > 5 + crc5 = 0; +#endif +#endif +#endif +#endif +#endif + + /* + Process the first blks-1 blocks, computing the CRCs on each braid + independently. + */ + while (--blks) { + /* Load the word for each braid into registers. */ + word0 = crc0 ^ words[0]; +#if N > 1 + word1 = crc1 ^ words[1]; +#if N > 2 + word2 = crc2 ^ words[2]; +#if N > 3 + word3 = crc3 ^ words[3]; +#if N > 4 + word4 = crc4 ^ words[4]; +#if N > 5 + word5 = crc5 ^ words[5]; +#endif +#endif +#endif +#endif +#endif + words += N; + + /* Compute and update the CRC for each word. The loop should + get unrolled. */ + crc0 = crc_braid_big_table[0][word0 & 0xff]; +#if N > 1 + crc1 = crc_braid_big_table[0][word1 & 0xff]; +#if N > 2 + crc2 = crc_braid_big_table[0][word2 & 0xff]; +#if N > 3 + crc3 = crc_braid_big_table[0][word3 & 0xff]; +#if N > 4 + crc4 = crc_braid_big_table[0][word4 & 0xff]; +#if N > 5 + crc5 = crc_braid_big_table[0][word5 & 0xff]; +#endif +#endif +#endif +#endif +#endif + for (k = 1; k < W; k++) { + crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff]; +#if N > 1 + crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff]; +#if N > 2 + crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff]; +#if N > 3 + crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff]; +#if N > 4 + crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff]; +#if N > 5 + crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff]; +#endif +#endif +#endif +#endif +#endif + } + } + + /* + Process the last block, combining the CRCs of the N braids at the + same time. + */ + comb = crc_word_big(crc0 ^ words[0]); +#if N > 1 + comb = crc_word_big(crc1 ^ words[1] ^ comb); +#if N > 2 + comb = crc_word_big(crc2 ^ words[2] ^ comb); +#if N > 3 + comb = crc_word_big(crc3 ^ words[3] ^ comb); +#if N > 4 + comb = crc_word_big(crc4 ^ words[4] ^ comb); +#if N > 5 + comb = crc_word_big(crc5 ^ words[5] ^ comb); +#endif +#endif +#endif +#endif +#endif + words += N; + crc = byte_swap(comb); + } + + /* + Update the pointer to the remaining bytes to process. + */ + buf = (unsigned char const *)words; } -#endif /* BYFOUR */ - crc = crc ^ 0xffffffffUL; + +#endif /* W */ + + /* Complete the computation of the CRC on any remaining bytes. */ while (len >= 8) { - DO8; len -= 8; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; } - if (len) do { - DO1; - } while (--len); - return crc ^ 0xffffffffUL; + while (len) { + len--; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + } + + /* Return the CRC, post-conditioned. */ + return crc ^ 0xffffffff; } +#endif + /* ========================================================================= */ unsigned long ZEXPORT crc32(crc, buf, len) unsigned long crc; @@ -242,186 +1068,16 @@ unsigned long ZEXPORT crc32(crc, buf, len) return crc32_z(crc, buf, len); } -#ifdef BYFOUR - -/* - This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit - integer pointer type. This violates the strict aliasing rule, where a - compiler can assume, for optimization purposes, that two pointers to - fundamentally different types won't ever point to the same memory. This can - manifest as a problem only if one of the pointers is written to. This code - only reads from those pointers. So long as this code remains isolated in - this compilation unit, there won't be a problem. For this reason, this code - should not be copied and pasted into a compilation unit in which other code - writes to the buffer that is passed to these routines. - */ - /* ========================================================================= */ -#define DOLIT4 c ^= *buf4++; \ - c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ - crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24] -#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 - -/* ========================================================================= */ -local unsigned long crc32_little(crc, buf, len) - unsigned long crc; - const unsigned char FAR *buf; - z_size_t len; -{ - register z_crc_t c; - register const z_crc_t FAR *buf4; - - c = (z_crc_t)crc; - c = ~c; - while (len && ((ptrdiff_t)buf & 3)) { - c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); - len--; - } - - buf4 = (const z_crc_t FAR *)(const void FAR *)buf; - while (len >= 32) { - DOLIT32; - len -= 32; - } - while (len >= 4) { - DOLIT4; - len -= 4; - } - buf = (const unsigned char FAR *)buf4; - - if (len) do { - c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); - } while (--len); - c = ~c; - return (unsigned long)c; -} - -/* ========================================================================= */ -#define DOBIG4 c ^= *buf4++; \ - c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ - crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] -#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 - -/* ========================================================================= */ -local unsigned long crc32_big(crc, buf, len) - unsigned long crc; - const unsigned char FAR *buf; - z_size_t len; -{ - register z_crc_t c; - register const z_crc_t FAR *buf4; - - c = ZSWAP32((z_crc_t)crc); - c = ~c; - while (len && ((ptrdiff_t)buf & 3)) { - c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); - len--; - } - - buf4 = (const z_crc_t FAR *)(const void FAR *)buf; - while (len >= 32) { - DOBIG32; - len -= 32; - } - while (len >= 4) { - DOBIG4; - len -= 4; - } - buf = (const unsigned char FAR *)buf4; - - if (len) do { - c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); - } while (--len); - c = ~c; - return (unsigned long)(ZSWAP32(c)); -} - -#endif /* BYFOUR */ - -#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */ - -/* ========================================================================= */ -local unsigned long gf2_matrix_times(mat, vec) - unsigned long *mat; - unsigned long vec; -{ - unsigned long sum; - - sum = 0; - while (vec) { - if (vec & 1) - sum ^= *mat; - vec >>= 1; - mat++; - } - return sum; -} - -/* ========================================================================= */ -local void gf2_matrix_square(square, mat) - unsigned long *square; - unsigned long *mat; -{ - int n; - - for (n = 0; n < GF2_DIM; n++) - square[n] = gf2_matrix_times(mat, mat[n]); -} - -/* ========================================================================= */ -local uLong crc32_combine_(crc1, crc2, len2) +uLong ZEXPORT crc32_combine64(crc1, crc2, len2) uLong crc1; uLong crc2; z_off64_t len2; { - int n; - unsigned long row; - unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ - unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ - - /* degenerate case (also disallow negative lengths) */ - if (len2 <= 0) - return crc1; - - /* put operator for one zero bit in odd */ - odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ - row = 1; - for (n = 1; n < GF2_DIM; n++) { - odd[n] = row; - row <<= 1; - } - - /* put operator for two zero bits in even */ - gf2_matrix_square(even, odd); - - /* put operator for four zero bits in odd */ - gf2_matrix_square(odd, even); - - /* apply len2 zeros to crc1 (first square will put the operator for one - zero byte, eight zero bits, in even) */ - do { - /* apply zeros operator for this bit of len2 */ - gf2_matrix_square(even, odd); - if (len2 & 1) - crc1 = gf2_matrix_times(even, crc1); - len2 >>= 1; - - /* if no more bits set, then done */ - if (len2 == 0) - break; - - /* another iteration of the loop with odd and even swapped */ - gf2_matrix_square(odd, even); - if (len2 & 1) - crc1 = gf2_matrix_times(odd, crc1); - len2 >>= 1; - - /* if no more bits set, then done */ - } while (len2 != 0); - - /* return combined crc */ - crc1 ^= crc2; - return crc1; +#ifdef DYNAMIC_CRC_TABLE + once(&made, make_crc_table); +#endif /* DYNAMIC_CRC_TABLE */ + return multmodp(x2nmodp(len2, 3), crc1) ^ crc2; } /* ========================================================================= */ @@ -430,13 +1086,31 @@ uLong ZEXPORT crc32_combine(crc1, crc2, len2) uLong crc2; z_off_t len2; { - return crc32_combine_(crc1, crc2, len2); + return crc32_combine64(crc1, crc2, len2); } -uLong ZEXPORT crc32_combine64(crc1, crc2, len2) - uLong crc1; - uLong crc2; +/* ========================================================================= */ +uLong ZEXPORT crc32_combine_gen64(len2) z_off64_t len2; { - return crc32_combine_(crc1, crc2, len2); +#ifdef DYNAMIC_CRC_TABLE + once(&made, make_crc_table); +#endif /* DYNAMIC_CRC_TABLE */ + return x2nmodp(len2, 3); +} + +/* ========================================================================= */ +uLong ZEXPORT crc32_combine_gen(len2) + z_off_t len2; +{ + return crc32_combine_gen64(len2); +} + +/* ========================================================================= */ +uLong crc32_combine_op(crc1, crc2, op) + uLong crc1; + uLong crc2; + uLong op; +{ + return multmodp(op, crc1) ^ crc2; } diff --git a/3rdparty/zlib/crc32.h b/3rdparty/zlib/crc32.h index 9e0c778102..137df68d61 100644 --- a/3rdparty/zlib/crc32.h +++ b/3rdparty/zlib/crc32.h @@ -2,440 +2,9445 @@ * Generated automatically by crc32.c */ -local const z_crc_t FAR crc_table[TBLS][256] = -{ - { - 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, - 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, - 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, - 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, - 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, - 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, - 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, - 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, - 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, - 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, - 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, - 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, - 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, - 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, - 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, - 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, - 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, - 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, - 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, - 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, - 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, - 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, - 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, - 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, - 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, - 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, - 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, - 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, - 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, - 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, - 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, - 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, - 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, - 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, - 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, - 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, - 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, - 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, - 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, - 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, - 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, - 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, - 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, - 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, - 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, - 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, - 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, - 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, - 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, - 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, - 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, - 0x2d02ef8dUL -#ifdef BYFOUR - }, - { - 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, - 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, - 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, - 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, - 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, - 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, - 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, - 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, - 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, - 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, - 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, - 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, - 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, - 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, - 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, - 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, - 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, - 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, - 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, - 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, - 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, - 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, - 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, - 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, - 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, - 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, - 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, - 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, - 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, - 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, - 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, - 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, - 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, - 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, - 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, - 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, - 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, - 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, - 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, - 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, - 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, - 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, - 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, - 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, - 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, - 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, - 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, - 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, - 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, - 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, - 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, - 0x9324fd72UL - }, - { - 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, - 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, - 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, - 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, - 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, - 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, - 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, - 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, - 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, - 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, - 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, - 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, - 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, - 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, - 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, - 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, - 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, - 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, - 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, - 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, - 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, - 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, - 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, - 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, - 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, - 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, - 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, - 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, - 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, - 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, - 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, - 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, - 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, - 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, - 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, - 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, - 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, - 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, - 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, - 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, - 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, - 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, - 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, - 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, - 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, - 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, - 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, - 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, - 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, - 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, - 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, - 0xbe9834edUL - }, - { - 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, - 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, - 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, - 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, - 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, - 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, - 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, - 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, - 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, - 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, - 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, - 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, - 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, - 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, - 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, - 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, - 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, - 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, - 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, - 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, - 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, - 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, - 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, - 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, - 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, - 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, - 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, - 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, - 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, - 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, - 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, - 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, - 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, - 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, - 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, - 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, - 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, - 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, - 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, - 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, - 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, - 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, - 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, - 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, - 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, - 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, - 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, - 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, - 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, - 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, - 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, - 0xde0506f1UL - }, - { - 0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL, - 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL, - 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL, - 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL, - 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL, - 0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL, - 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL, - 0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL, - 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL, - 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL, - 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL, - 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL, - 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL, - 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL, - 0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL, - 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL, - 0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL, - 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL, - 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL, - 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL, - 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL, - 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL, - 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL, - 0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL, - 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL, - 0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL, - 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL, - 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL, - 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL, - 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL, - 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL, - 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL, - 0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL, - 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL, - 0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL, - 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL, - 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL, - 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL, - 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL, - 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL, - 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL, - 0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL, - 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL, - 0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL, - 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL, - 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL, - 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL, - 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL, - 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL, - 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL, - 0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL, - 0x8def022dUL - }, - { - 0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL, - 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL, - 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL, - 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL, - 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL, - 0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL, - 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL, - 0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL, - 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL, - 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL, - 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL, - 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL, - 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL, - 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL, - 0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL, - 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL, - 0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL, - 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL, - 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL, - 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL, - 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL, - 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL, - 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL, - 0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL, - 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL, - 0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL, - 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL, - 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL, - 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL, - 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL, - 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL, - 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL, - 0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL, - 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL, - 0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL, - 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL, - 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL, - 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL, - 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL, - 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL, - 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL, - 0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL, - 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL, - 0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL, - 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL, - 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL, - 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL, - 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL, - 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL, - 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL, - 0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL, - 0x72fd2493UL - }, - { - 0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL, - 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL, - 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL, - 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL, - 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL, - 0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL, - 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL, - 0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL, - 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL, - 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL, - 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL, - 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL, - 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL, - 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL, - 0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL, - 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL, - 0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL, - 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL, - 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL, - 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL, - 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL, - 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL, - 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL, - 0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL, - 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL, - 0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL, - 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL, - 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL, - 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL, - 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL, - 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL, - 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL, - 0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL, - 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL, - 0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL, - 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL, - 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL, - 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL, - 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL, - 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL, - 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL, - 0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL, - 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL, - 0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL, - 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL, - 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL, - 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL, - 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL, - 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL, - 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL, - 0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL, - 0xed3498beUL - }, - { - 0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL, - 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL, - 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL, - 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL, - 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL, - 0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL, - 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL, - 0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL, - 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL, - 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL, - 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL, - 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL, - 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL, - 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL, - 0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL, - 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL, - 0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL, - 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL, - 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL, - 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL, - 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL, - 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL, - 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL, - 0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL, - 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL, - 0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL, - 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL, - 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL, - 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL, - 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL, - 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL, - 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL, - 0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL, - 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL, - 0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL, - 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL, - 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL, - 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL, - 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL, - 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL, - 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL, - 0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL, - 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL, - 0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL, - 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL, - 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL, - 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL, - 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL, - 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL, - 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL, - 0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL, - 0xf10605deUL +local const z_crc_t FAR crc_table[] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d}; + +#ifdef W + +#if W == 8 + +local const z_word_t FAR crc_big_table[] = { + 0x0000000000000000, 0x9630077700000000, 0x2c610eee00000000, + 0xba51099900000000, 0x19c46d0700000000, 0x8ff46a7000000000, + 0x35a563e900000000, 0xa395649e00000000, 0x3288db0e00000000, + 0xa4b8dc7900000000, 0x1ee9d5e000000000, 0x88d9d29700000000, + 0x2b4cb60900000000, 0xbd7cb17e00000000, 0x072db8e700000000, + 0x911dbf9000000000, 0x6410b71d00000000, 0xf220b06a00000000, + 0x4871b9f300000000, 0xde41be8400000000, 0x7dd4da1a00000000, + 0xebe4dd6d00000000, 0x51b5d4f400000000, 0xc785d38300000000, + 0x56986c1300000000, 0xc0a86b6400000000, 0x7af962fd00000000, + 0xecc9658a00000000, 0x4f5c011400000000, 0xd96c066300000000, + 0x633d0ffa00000000, 0xf50d088d00000000, 0xc8206e3b00000000, + 0x5e10694c00000000, 0xe44160d500000000, 0x727167a200000000, + 0xd1e4033c00000000, 0x47d4044b00000000, 0xfd850dd200000000, + 0x6bb50aa500000000, 0xfaa8b53500000000, 0x6c98b24200000000, + 0xd6c9bbdb00000000, 0x40f9bcac00000000, 0xe36cd83200000000, + 0x755cdf4500000000, 0xcf0dd6dc00000000, 0x593dd1ab00000000, + 0xac30d92600000000, 0x3a00de5100000000, 0x8051d7c800000000, + 0x1661d0bf00000000, 0xb5f4b42100000000, 0x23c4b35600000000, + 0x9995bacf00000000, 0x0fa5bdb800000000, 0x9eb8022800000000, + 0x0888055f00000000, 0xb2d90cc600000000, 0x24e90bb100000000, + 0x877c6f2f00000000, 0x114c685800000000, 0xab1d61c100000000, + 0x3d2d66b600000000, 0x9041dc7600000000, 0x0671db0100000000, + 0xbc20d29800000000, 0x2a10d5ef00000000, 0x8985b17100000000, + 0x1fb5b60600000000, 0xa5e4bf9f00000000, 0x33d4b8e800000000, + 0xa2c9077800000000, 0x34f9000f00000000, 0x8ea8099600000000, + 0x18980ee100000000, 0xbb0d6a7f00000000, 0x2d3d6d0800000000, + 0x976c649100000000, 0x015c63e600000000, 0xf4516b6b00000000, + 0x62616c1c00000000, 0xd830658500000000, 0x4e0062f200000000, + 0xed95066c00000000, 0x7ba5011b00000000, 0xc1f4088200000000, + 0x57c40ff500000000, 0xc6d9b06500000000, 0x50e9b71200000000, + 0xeab8be8b00000000, 0x7c88b9fc00000000, 0xdf1ddd6200000000, + 0x492dda1500000000, 0xf37cd38c00000000, 0x654cd4fb00000000, + 0x5861b24d00000000, 0xce51b53a00000000, 0x7400bca300000000, + 0xe230bbd400000000, 0x41a5df4a00000000, 0xd795d83d00000000, + 0x6dc4d1a400000000, 0xfbf4d6d300000000, 0x6ae9694300000000, + 0xfcd96e3400000000, 0x468867ad00000000, 0xd0b860da00000000, + 0x732d044400000000, 0xe51d033300000000, 0x5f4c0aaa00000000, + 0xc97c0ddd00000000, 0x3c71055000000000, 0xaa41022700000000, + 0x10100bbe00000000, 0x86200cc900000000, 0x25b5685700000000, + 0xb3856f2000000000, 0x09d466b900000000, 0x9fe461ce00000000, + 0x0ef9de5e00000000, 0x98c9d92900000000, 0x2298d0b000000000, + 0xb4a8d7c700000000, 0x173db35900000000, 0x810db42e00000000, + 0x3b5cbdb700000000, 0xad6cbac000000000, 0x2083b8ed00000000, + 0xb6b3bf9a00000000, 0x0ce2b60300000000, 0x9ad2b17400000000, + 0x3947d5ea00000000, 0xaf77d29d00000000, 0x1526db0400000000, + 0x8316dc7300000000, 0x120b63e300000000, 0x843b649400000000, + 0x3e6a6d0d00000000, 0xa85a6a7a00000000, 0x0bcf0ee400000000, + 0x9dff099300000000, 0x27ae000a00000000, 0xb19e077d00000000, + 0x44930ff000000000, 0xd2a3088700000000, 0x68f2011e00000000, + 0xfec2066900000000, 0x5d5762f700000000, 0xcb67658000000000, + 0x71366c1900000000, 0xe7066b6e00000000, 0x761bd4fe00000000, + 0xe02bd38900000000, 0x5a7ada1000000000, 0xcc4add6700000000, + 0x6fdfb9f900000000, 0xf9efbe8e00000000, 0x43beb71700000000, + 0xd58eb06000000000, 0xe8a3d6d600000000, 0x7e93d1a100000000, + 0xc4c2d83800000000, 0x52f2df4f00000000, 0xf167bbd100000000, + 0x6757bca600000000, 0xdd06b53f00000000, 0x4b36b24800000000, + 0xda2b0dd800000000, 0x4c1b0aaf00000000, 0xf64a033600000000, + 0x607a044100000000, 0xc3ef60df00000000, 0x55df67a800000000, + 0xef8e6e3100000000, 0x79be694600000000, 0x8cb361cb00000000, + 0x1a8366bc00000000, 0xa0d26f2500000000, 0x36e2685200000000, + 0x95770ccc00000000, 0x03470bbb00000000, 0xb916022200000000, + 0x2f26055500000000, 0xbe3bbac500000000, 0x280bbdb200000000, + 0x925ab42b00000000, 0x046ab35c00000000, 0xa7ffd7c200000000, + 0x31cfd0b500000000, 0x8b9ed92c00000000, 0x1daede5b00000000, + 0xb0c2649b00000000, 0x26f263ec00000000, 0x9ca36a7500000000, + 0x0a936d0200000000, 0xa906099c00000000, 0x3f360eeb00000000, + 0x8567077200000000, 0x1357000500000000, 0x824abf9500000000, + 0x147ab8e200000000, 0xae2bb17b00000000, 0x381bb60c00000000, + 0x9b8ed29200000000, 0x0dbed5e500000000, 0xb7efdc7c00000000, + 0x21dfdb0b00000000, 0xd4d2d38600000000, 0x42e2d4f100000000, + 0xf8b3dd6800000000, 0x6e83da1f00000000, 0xcd16be8100000000, + 0x5b26b9f600000000, 0xe177b06f00000000, 0x7747b71800000000, + 0xe65a088800000000, 0x706a0fff00000000, 0xca3b066600000000, + 0x5c0b011100000000, 0xff9e658f00000000, 0x69ae62f800000000, + 0xd3ff6b6100000000, 0x45cf6c1600000000, 0x78e20aa000000000, + 0xeed20dd700000000, 0x5483044e00000000, 0xc2b3033900000000, + 0x612667a700000000, 0xf71660d000000000, 0x4d47694900000000, + 0xdb776e3e00000000, 0x4a6ad1ae00000000, 0xdc5ad6d900000000, + 0x660bdf4000000000, 0xf03bd83700000000, 0x53aebca900000000, + 0xc59ebbde00000000, 0x7fcfb24700000000, 0xe9ffb53000000000, + 0x1cf2bdbd00000000, 0x8ac2baca00000000, 0x3093b35300000000, + 0xa6a3b42400000000, 0x0536d0ba00000000, 0x9306d7cd00000000, + 0x2957de5400000000, 0xbf67d92300000000, 0x2e7a66b300000000, + 0xb84a61c400000000, 0x021b685d00000000, 0x942b6f2a00000000, + 0x37be0bb400000000, 0xa18e0cc300000000, 0x1bdf055a00000000, + 0x8def022d00000000}; + +#else /* W == 4 */ + +local const z_word_t FAR crc_big_table[] = { + 0x00000000, 0x96300777, 0x2c610eee, 0xba510999, 0x19c46d07, + 0x8ff46a70, 0x35a563e9, 0xa395649e, 0x3288db0e, 0xa4b8dc79, + 0x1ee9d5e0, 0x88d9d297, 0x2b4cb609, 0xbd7cb17e, 0x072db8e7, + 0x911dbf90, 0x6410b71d, 0xf220b06a, 0x4871b9f3, 0xde41be84, + 0x7dd4da1a, 0xebe4dd6d, 0x51b5d4f4, 0xc785d383, 0x56986c13, + 0xc0a86b64, 0x7af962fd, 0xecc9658a, 0x4f5c0114, 0xd96c0663, + 0x633d0ffa, 0xf50d088d, 0xc8206e3b, 0x5e10694c, 0xe44160d5, + 0x727167a2, 0xd1e4033c, 0x47d4044b, 0xfd850dd2, 0x6bb50aa5, + 0xfaa8b535, 0x6c98b242, 0xd6c9bbdb, 0x40f9bcac, 0xe36cd832, + 0x755cdf45, 0xcf0dd6dc, 0x593dd1ab, 0xac30d926, 0x3a00de51, + 0x8051d7c8, 0x1661d0bf, 0xb5f4b421, 0x23c4b356, 0x9995bacf, + 0x0fa5bdb8, 0x9eb80228, 0x0888055f, 0xb2d90cc6, 0x24e90bb1, + 0x877c6f2f, 0x114c6858, 0xab1d61c1, 0x3d2d66b6, 0x9041dc76, + 0x0671db01, 0xbc20d298, 0x2a10d5ef, 0x8985b171, 0x1fb5b606, + 0xa5e4bf9f, 0x33d4b8e8, 0xa2c90778, 0x34f9000f, 0x8ea80996, + 0x18980ee1, 0xbb0d6a7f, 0x2d3d6d08, 0x976c6491, 0x015c63e6, + 0xf4516b6b, 0x62616c1c, 0xd8306585, 0x4e0062f2, 0xed95066c, + 0x7ba5011b, 0xc1f40882, 0x57c40ff5, 0xc6d9b065, 0x50e9b712, + 0xeab8be8b, 0x7c88b9fc, 0xdf1ddd62, 0x492dda15, 0xf37cd38c, + 0x654cd4fb, 0x5861b24d, 0xce51b53a, 0x7400bca3, 0xe230bbd4, + 0x41a5df4a, 0xd795d83d, 0x6dc4d1a4, 0xfbf4d6d3, 0x6ae96943, + 0xfcd96e34, 0x468867ad, 0xd0b860da, 0x732d0444, 0xe51d0333, + 0x5f4c0aaa, 0xc97c0ddd, 0x3c710550, 0xaa410227, 0x10100bbe, + 0x86200cc9, 0x25b56857, 0xb3856f20, 0x09d466b9, 0x9fe461ce, + 0x0ef9de5e, 0x98c9d929, 0x2298d0b0, 0xb4a8d7c7, 0x173db359, + 0x810db42e, 0x3b5cbdb7, 0xad6cbac0, 0x2083b8ed, 0xb6b3bf9a, + 0x0ce2b603, 0x9ad2b174, 0x3947d5ea, 0xaf77d29d, 0x1526db04, + 0x8316dc73, 0x120b63e3, 0x843b6494, 0x3e6a6d0d, 0xa85a6a7a, + 0x0bcf0ee4, 0x9dff0993, 0x27ae000a, 0xb19e077d, 0x44930ff0, + 0xd2a30887, 0x68f2011e, 0xfec20669, 0x5d5762f7, 0xcb676580, + 0x71366c19, 0xe7066b6e, 0x761bd4fe, 0xe02bd389, 0x5a7ada10, + 0xcc4add67, 0x6fdfb9f9, 0xf9efbe8e, 0x43beb717, 0xd58eb060, + 0xe8a3d6d6, 0x7e93d1a1, 0xc4c2d838, 0x52f2df4f, 0xf167bbd1, + 0x6757bca6, 0xdd06b53f, 0x4b36b248, 0xda2b0dd8, 0x4c1b0aaf, + 0xf64a0336, 0x607a0441, 0xc3ef60df, 0x55df67a8, 0xef8e6e31, + 0x79be6946, 0x8cb361cb, 0x1a8366bc, 0xa0d26f25, 0x36e26852, + 0x95770ccc, 0x03470bbb, 0xb9160222, 0x2f260555, 0xbe3bbac5, + 0x280bbdb2, 0x925ab42b, 0x046ab35c, 0xa7ffd7c2, 0x31cfd0b5, + 0x8b9ed92c, 0x1daede5b, 0xb0c2649b, 0x26f263ec, 0x9ca36a75, + 0x0a936d02, 0xa906099c, 0x3f360eeb, 0x85670772, 0x13570005, + 0x824abf95, 0x147ab8e2, 0xae2bb17b, 0x381bb60c, 0x9b8ed292, + 0x0dbed5e5, 0xb7efdc7c, 0x21dfdb0b, 0xd4d2d386, 0x42e2d4f1, + 0xf8b3dd68, 0x6e83da1f, 0xcd16be81, 0x5b26b9f6, 0xe177b06f, + 0x7747b718, 0xe65a0888, 0x706a0fff, 0xca3b0666, 0x5c0b0111, + 0xff9e658f, 0x69ae62f8, 0xd3ff6b61, 0x45cf6c16, 0x78e20aa0, + 0xeed20dd7, 0x5483044e, 0xc2b30339, 0x612667a7, 0xf71660d0, + 0x4d476949, 0xdb776e3e, 0x4a6ad1ae, 0xdc5ad6d9, 0x660bdf40, + 0xf03bd837, 0x53aebca9, 0xc59ebbde, 0x7fcfb247, 0xe9ffb530, + 0x1cf2bdbd, 0x8ac2baca, 0x3093b353, 0xa6a3b424, 0x0536d0ba, + 0x9306d7cd, 0x2957de54, 0xbf67d923, 0x2e7a66b3, 0xb84a61c4, + 0x021b685d, 0x942b6f2a, 0x37be0bb4, 0xa18e0cc3, 0x1bdf055a, + 0x8def022d}; + #endif - } -}; + +#if N == 1 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xccaa009e, 0x4225077d, 0x8e8f07e3, 0x844a0efa, + 0x48e00e64, 0xc66f0987, 0x0ac50919, 0xd3e51bb5, 0x1f4f1b2b, + 0x91c01cc8, 0x5d6a1c56, 0x57af154f, 0x9b0515d1, 0x158a1232, + 0xd92012ac, 0x7cbb312b, 0xb01131b5, 0x3e9e3656, 0xf23436c8, + 0xf8f13fd1, 0x345b3f4f, 0xbad438ac, 0x767e3832, 0xaf5e2a9e, + 0x63f42a00, 0xed7b2de3, 0x21d12d7d, 0x2b142464, 0xe7be24fa, + 0x69312319, 0xa59b2387, 0xf9766256, 0x35dc62c8, 0xbb53652b, + 0x77f965b5, 0x7d3c6cac, 0xb1966c32, 0x3f196bd1, 0xf3b36b4f, + 0x2a9379e3, 0xe639797d, 0x68b67e9e, 0xa41c7e00, 0xaed97719, + 0x62737787, 0xecfc7064, 0x205670fa, 0x85cd537d, 0x496753e3, + 0xc7e85400, 0x0b42549e, 0x01875d87, 0xcd2d5d19, 0x43a25afa, + 0x8f085a64, 0x562848c8, 0x9a824856, 0x140d4fb5, 0xd8a74f2b, + 0xd2624632, 0x1ec846ac, 0x9047414f, 0x5ced41d1, 0x299dc2ed, + 0xe537c273, 0x6bb8c590, 0xa712c50e, 0xadd7cc17, 0x617dcc89, + 0xeff2cb6a, 0x2358cbf4, 0xfa78d958, 0x36d2d9c6, 0xb85dde25, + 0x74f7debb, 0x7e32d7a2, 0xb298d73c, 0x3c17d0df, 0xf0bdd041, + 0x5526f3c6, 0x998cf358, 0x1703f4bb, 0xdba9f425, 0xd16cfd3c, + 0x1dc6fda2, 0x9349fa41, 0x5fe3fadf, 0x86c3e873, 0x4a69e8ed, + 0xc4e6ef0e, 0x084cef90, 0x0289e689, 0xce23e617, 0x40ace1f4, + 0x8c06e16a, 0xd0eba0bb, 0x1c41a025, 0x92cea7c6, 0x5e64a758, + 0x54a1ae41, 0x980baedf, 0x1684a93c, 0xda2ea9a2, 0x030ebb0e, + 0xcfa4bb90, 0x412bbc73, 0x8d81bced, 0x8744b5f4, 0x4beeb56a, + 0xc561b289, 0x09cbb217, 0xac509190, 0x60fa910e, 0xee7596ed, + 0x22df9673, 0x281a9f6a, 0xe4b09ff4, 0x6a3f9817, 0xa6959889, + 0x7fb58a25, 0xb31f8abb, 0x3d908d58, 0xf13a8dc6, 0xfbff84df, + 0x37558441, 0xb9da83a2, 0x7570833c, 0x533b85da, 0x9f918544, + 0x111e82a7, 0xddb48239, 0xd7718b20, 0x1bdb8bbe, 0x95548c5d, + 0x59fe8cc3, 0x80de9e6f, 0x4c749ef1, 0xc2fb9912, 0x0e51998c, + 0x04949095, 0xc83e900b, 0x46b197e8, 0x8a1b9776, 0x2f80b4f1, + 0xe32ab46f, 0x6da5b38c, 0xa10fb312, 0xabcaba0b, 0x6760ba95, + 0xe9efbd76, 0x2545bde8, 0xfc65af44, 0x30cfafda, 0xbe40a839, + 0x72eaa8a7, 0x782fa1be, 0xb485a120, 0x3a0aa6c3, 0xf6a0a65d, + 0xaa4de78c, 0x66e7e712, 0xe868e0f1, 0x24c2e06f, 0x2e07e976, + 0xe2ade9e8, 0x6c22ee0b, 0xa088ee95, 0x79a8fc39, 0xb502fca7, + 0x3b8dfb44, 0xf727fbda, 0xfde2f2c3, 0x3148f25d, 0xbfc7f5be, + 0x736df520, 0xd6f6d6a7, 0x1a5cd639, 0x94d3d1da, 0x5879d144, + 0x52bcd85d, 0x9e16d8c3, 0x1099df20, 0xdc33dfbe, 0x0513cd12, + 0xc9b9cd8c, 0x4736ca6f, 0x8b9ccaf1, 0x8159c3e8, 0x4df3c376, + 0xc37cc495, 0x0fd6c40b, 0x7aa64737, 0xb60c47a9, 0x3883404a, + 0xf42940d4, 0xfeec49cd, 0x32464953, 0xbcc94eb0, 0x70634e2e, + 0xa9435c82, 0x65e95c1c, 0xeb665bff, 0x27cc5b61, 0x2d095278, + 0xe1a352e6, 0x6f2c5505, 0xa386559b, 0x061d761c, 0xcab77682, + 0x44387161, 0x889271ff, 0x825778e6, 0x4efd7878, 0xc0727f9b, + 0x0cd87f05, 0xd5f86da9, 0x19526d37, 0x97dd6ad4, 0x5b776a4a, + 0x51b26353, 0x9d1863cd, 0x1397642e, 0xdf3d64b0, 0x83d02561, + 0x4f7a25ff, 0xc1f5221c, 0x0d5f2282, 0x079a2b9b, 0xcb302b05, + 0x45bf2ce6, 0x89152c78, 0x50353ed4, 0x9c9f3e4a, 0x121039a9, + 0xdeba3937, 0xd47f302e, 0x18d530b0, 0x965a3753, 0x5af037cd, + 0xff6b144a, 0x33c114d4, 0xbd4e1337, 0x71e413a9, 0x7b211ab0, + 0xb78b1a2e, 0x39041dcd, 0xf5ae1d53, 0x2c8e0fff, 0xe0240f61, + 0x6eab0882, 0xa201081c, 0xa8c40105, 0x646e019b, 0xeae10678, + 0x264b06e6}, + {0x00000000, 0xa6770bb4, 0x979f1129, 0x31e81a9d, 0xf44f2413, + 0x52382fa7, 0x63d0353a, 0xc5a73e8e, 0x33ef4e67, 0x959845d3, + 0xa4705f4e, 0x020754fa, 0xc7a06a74, 0x61d761c0, 0x503f7b5d, + 0xf64870e9, 0x67de9cce, 0xc1a9977a, 0xf0418de7, 0x56368653, + 0x9391b8dd, 0x35e6b369, 0x040ea9f4, 0xa279a240, 0x5431d2a9, + 0xf246d91d, 0xc3aec380, 0x65d9c834, 0xa07ef6ba, 0x0609fd0e, + 0x37e1e793, 0x9196ec27, 0xcfbd399c, 0x69ca3228, 0x582228b5, + 0xfe552301, 0x3bf21d8f, 0x9d85163b, 0xac6d0ca6, 0x0a1a0712, + 0xfc5277fb, 0x5a257c4f, 0x6bcd66d2, 0xcdba6d66, 0x081d53e8, + 0xae6a585c, 0x9f8242c1, 0x39f54975, 0xa863a552, 0x0e14aee6, + 0x3ffcb47b, 0x998bbfcf, 0x5c2c8141, 0xfa5b8af5, 0xcbb39068, + 0x6dc49bdc, 0x9b8ceb35, 0x3dfbe081, 0x0c13fa1c, 0xaa64f1a8, + 0x6fc3cf26, 0xc9b4c492, 0xf85cde0f, 0x5e2bd5bb, 0x440b7579, + 0xe27c7ecd, 0xd3946450, 0x75e36fe4, 0xb044516a, 0x16335ade, + 0x27db4043, 0x81ac4bf7, 0x77e43b1e, 0xd19330aa, 0xe07b2a37, + 0x460c2183, 0x83ab1f0d, 0x25dc14b9, 0x14340e24, 0xb2430590, + 0x23d5e9b7, 0x85a2e203, 0xb44af89e, 0x123df32a, 0xd79acda4, + 0x71edc610, 0x4005dc8d, 0xe672d739, 0x103aa7d0, 0xb64dac64, + 0x87a5b6f9, 0x21d2bd4d, 0xe47583c3, 0x42028877, 0x73ea92ea, + 0xd59d995e, 0x8bb64ce5, 0x2dc14751, 0x1c295dcc, 0xba5e5678, + 0x7ff968f6, 0xd98e6342, 0xe86679df, 0x4e11726b, 0xb8590282, + 0x1e2e0936, 0x2fc613ab, 0x89b1181f, 0x4c162691, 0xea612d25, + 0xdb8937b8, 0x7dfe3c0c, 0xec68d02b, 0x4a1fdb9f, 0x7bf7c102, + 0xdd80cab6, 0x1827f438, 0xbe50ff8c, 0x8fb8e511, 0x29cfeea5, + 0xdf879e4c, 0x79f095f8, 0x48188f65, 0xee6f84d1, 0x2bc8ba5f, + 0x8dbfb1eb, 0xbc57ab76, 0x1a20a0c2, 0x8816eaf2, 0x2e61e146, + 0x1f89fbdb, 0xb9fef06f, 0x7c59cee1, 0xda2ec555, 0xebc6dfc8, + 0x4db1d47c, 0xbbf9a495, 0x1d8eaf21, 0x2c66b5bc, 0x8a11be08, + 0x4fb68086, 0xe9c18b32, 0xd82991af, 0x7e5e9a1b, 0xefc8763c, + 0x49bf7d88, 0x78576715, 0xde206ca1, 0x1b87522f, 0xbdf0599b, + 0x8c184306, 0x2a6f48b2, 0xdc27385b, 0x7a5033ef, 0x4bb82972, + 0xedcf22c6, 0x28681c48, 0x8e1f17fc, 0xbff70d61, 0x198006d5, + 0x47abd36e, 0xe1dcd8da, 0xd034c247, 0x7643c9f3, 0xb3e4f77d, + 0x1593fcc9, 0x247be654, 0x820cede0, 0x74449d09, 0xd23396bd, + 0xe3db8c20, 0x45ac8794, 0x800bb91a, 0x267cb2ae, 0x1794a833, + 0xb1e3a387, 0x20754fa0, 0x86024414, 0xb7ea5e89, 0x119d553d, + 0xd43a6bb3, 0x724d6007, 0x43a57a9a, 0xe5d2712e, 0x139a01c7, + 0xb5ed0a73, 0x840510ee, 0x22721b5a, 0xe7d525d4, 0x41a22e60, + 0x704a34fd, 0xd63d3f49, 0xcc1d9f8b, 0x6a6a943f, 0x5b828ea2, + 0xfdf58516, 0x3852bb98, 0x9e25b02c, 0xafcdaab1, 0x09baa105, + 0xfff2d1ec, 0x5985da58, 0x686dc0c5, 0xce1acb71, 0x0bbdf5ff, + 0xadcafe4b, 0x9c22e4d6, 0x3a55ef62, 0xabc30345, 0x0db408f1, + 0x3c5c126c, 0x9a2b19d8, 0x5f8c2756, 0xf9fb2ce2, 0xc813367f, + 0x6e643dcb, 0x982c4d22, 0x3e5b4696, 0x0fb35c0b, 0xa9c457bf, + 0x6c636931, 0xca146285, 0xfbfc7818, 0x5d8b73ac, 0x03a0a617, + 0xa5d7ada3, 0x943fb73e, 0x3248bc8a, 0xf7ef8204, 0x519889b0, + 0x6070932d, 0xc6079899, 0x304fe870, 0x9638e3c4, 0xa7d0f959, + 0x01a7f2ed, 0xc400cc63, 0x6277c7d7, 0x539fdd4a, 0xf5e8d6fe, + 0x647e3ad9, 0xc209316d, 0xf3e12bf0, 0x55962044, 0x90311eca, + 0x3646157e, 0x07ae0fe3, 0xa1d90457, 0x579174be, 0xf1e67f0a, + 0xc00e6597, 0x66796e23, 0xa3de50ad, 0x05a95b19, 0x34414184, + 0x92364a30}, + {0x00000000, 0xcb5cd3a5, 0x4dc8a10b, 0x869472ae, 0x9b914216, + 0x50cd91b3, 0xd659e31d, 0x1d0530b8, 0xec53826d, 0x270f51c8, + 0xa19b2366, 0x6ac7f0c3, 0x77c2c07b, 0xbc9e13de, 0x3a0a6170, + 0xf156b2d5, 0x03d6029b, 0xc88ad13e, 0x4e1ea390, 0x85427035, + 0x9847408d, 0x531b9328, 0xd58fe186, 0x1ed33223, 0xef8580f6, + 0x24d95353, 0xa24d21fd, 0x6911f258, 0x7414c2e0, 0xbf481145, + 0x39dc63eb, 0xf280b04e, 0x07ac0536, 0xccf0d693, 0x4a64a43d, + 0x81387798, 0x9c3d4720, 0x57619485, 0xd1f5e62b, 0x1aa9358e, + 0xebff875b, 0x20a354fe, 0xa6372650, 0x6d6bf5f5, 0x706ec54d, + 0xbb3216e8, 0x3da66446, 0xf6fab7e3, 0x047a07ad, 0xcf26d408, + 0x49b2a6a6, 0x82ee7503, 0x9feb45bb, 0x54b7961e, 0xd223e4b0, + 0x197f3715, 0xe82985c0, 0x23755665, 0xa5e124cb, 0x6ebdf76e, + 0x73b8c7d6, 0xb8e41473, 0x3e7066dd, 0xf52cb578, 0x0f580a6c, + 0xc404d9c9, 0x4290ab67, 0x89cc78c2, 0x94c9487a, 0x5f959bdf, + 0xd901e971, 0x125d3ad4, 0xe30b8801, 0x28575ba4, 0xaec3290a, + 0x659ffaaf, 0x789aca17, 0xb3c619b2, 0x35526b1c, 0xfe0eb8b9, + 0x0c8e08f7, 0xc7d2db52, 0x4146a9fc, 0x8a1a7a59, 0x971f4ae1, + 0x5c439944, 0xdad7ebea, 0x118b384f, 0xe0dd8a9a, 0x2b81593f, + 0xad152b91, 0x6649f834, 0x7b4cc88c, 0xb0101b29, 0x36846987, + 0xfdd8ba22, 0x08f40f5a, 0xc3a8dcff, 0x453cae51, 0x8e607df4, + 0x93654d4c, 0x58399ee9, 0xdeadec47, 0x15f13fe2, 0xe4a78d37, + 0x2ffb5e92, 0xa96f2c3c, 0x6233ff99, 0x7f36cf21, 0xb46a1c84, + 0x32fe6e2a, 0xf9a2bd8f, 0x0b220dc1, 0xc07ede64, 0x46eaacca, + 0x8db67f6f, 0x90b34fd7, 0x5bef9c72, 0xdd7beedc, 0x16273d79, + 0xe7718fac, 0x2c2d5c09, 0xaab92ea7, 0x61e5fd02, 0x7ce0cdba, + 0xb7bc1e1f, 0x31286cb1, 0xfa74bf14, 0x1eb014d8, 0xd5ecc77d, + 0x5378b5d3, 0x98246676, 0x852156ce, 0x4e7d856b, 0xc8e9f7c5, + 0x03b52460, 0xf2e396b5, 0x39bf4510, 0xbf2b37be, 0x7477e41b, + 0x6972d4a3, 0xa22e0706, 0x24ba75a8, 0xefe6a60d, 0x1d661643, + 0xd63ac5e6, 0x50aeb748, 0x9bf264ed, 0x86f75455, 0x4dab87f0, + 0xcb3ff55e, 0x006326fb, 0xf135942e, 0x3a69478b, 0xbcfd3525, + 0x77a1e680, 0x6aa4d638, 0xa1f8059d, 0x276c7733, 0xec30a496, + 0x191c11ee, 0xd240c24b, 0x54d4b0e5, 0x9f886340, 0x828d53f8, + 0x49d1805d, 0xcf45f2f3, 0x04192156, 0xf54f9383, 0x3e134026, + 0xb8873288, 0x73dbe12d, 0x6eded195, 0xa5820230, 0x2316709e, + 0xe84aa33b, 0x1aca1375, 0xd196c0d0, 0x5702b27e, 0x9c5e61db, + 0x815b5163, 0x4a0782c6, 0xcc93f068, 0x07cf23cd, 0xf6999118, + 0x3dc542bd, 0xbb513013, 0x700de3b6, 0x6d08d30e, 0xa65400ab, + 0x20c07205, 0xeb9ca1a0, 0x11e81eb4, 0xdab4cd11, 0x5c20bfbf, + 0x977c6c1a, 0x8a795ca2, 0x41258f07, 0xc7b1fda9, 0x0ced2e0c, + 0xfdbb9cd9, 0x36e74f7c, 0xb0733dd2, 0x7b2fee77, 0x662adecf, + 0xad760d6a, 0x2be27fc4, 0xe0beac61, 0x123e1c2f, 0xd962cf8a, + 0x5ff6bd24, 0x94aa6e81, 0x89af5e39, 0x42f38d9c, 0xc467ff32, + 0x0f3b2c97, 0xfe6d9e42, 0x35314de7, 0xb3a53f49, 0x78f9ecec, + 0x65fcdc54, 0xaea00ff1, 0x28347d5f, 0xe368aefa, 0x16441b82, + 0xdd18c827, 0x5b8cba89, 0x90d0692c, 0x8dd55994, 0x46898a31, + 0xc01df89f, 0x0b412b3a, 0xfa1799ef, 0x314b4a4a, 0xb7df38e4, + 0x7c83eb41, 0x6186dbf9, 0xaada085c, 0x2c4e7af2, 0xe712a957, + 0x15921919, 0xdececabc, 0x585ab812, 0x93066bb7, 0x8e035b0f, + 0x455f88aa, 0xc3cbfa04, 0x089729a1, 0xf9c19b74, 0x329d48d1, + 0xb4093a7f, 0x7f55e9da, 0x6250d962, 0xa90c0ac7, 0x2f987869, + 0xe4c4abcc}, + {0x00000000, 0x3d6029b0, 0x7ac05360, 0x47a07ad0, 0xf580a6c0, + 0xc8e08f70, 0x8f40f5a0, 0xb220dc10, 0x30704bc1, 0x0d106271, + 0x4ab018a1, 0x77d03111, 0xc5f0ed01, 0xf890c4b1, 0xbf30be61, + 0x825097d1, 0x60e09782, 0x5d80be32, 0x1a20c4e2, 0x2740ed52, + 0x95603142, 0xa80018f2, 0xefa06222, 0xd2c04b92, 0x5090dc43, + 0x6df0f5f3, 0x2a508f23, 0x1730a693, 0xa5107a83, 0x98705333, + 0xdfd029e3, 0xe2b00053, 0xc1c12f04, 0xfca106b4, 0xbb017c64, + 0x866155d4, 0x344189c4, 0x0921a074, 0x4e81daa4, 0x73e1f314, + 0xf1b164c5, 0xccd14d75, 0x8b7137a5, 0xb6111e15, 0x0431c205, + 0x3951ebb5, 0x7ef19165, 0x4391b8d5, 0xa121b886, 0x9c419136, + 0xdbe1ebe6, 0xe681c256, 0x54a11e46, 0x69c137f6, 0x2e614d26, + 0x13016496, 0x9151f347, 0xac31daf7, 0xeb91a027, 0xd6f18997, + 0x64d15587, 0x59b17c37, 0x1e1106e7, 0x23712f57, 0x58f35849, + 0x659371f9, 0x22330b29, 0x1f532299, 0xad73fe89, 0x9013d739, + 0xd7b3ade9, 0xead38459, 0x68831388, 0x55e33a38, 0x124340e8, + 0x2f236958, 0x9d03b548, 0xa0639cf8, 0xe7c3e628, 0xdaa3cf98, + 0x3813cfcb, 0x0573e67b, 0x42d39cab, 0x7fb3b51b, 0xcd93690b, + 0xf0f340bb, 0xb7533a6b, 0x8a3313db, 0x0863840a, 0x3503adba, + 0x72a3d76a, 0x4fc3feda, 0xfde322ca, 0xc0830b7a, 0x872371aa, + 0xba43581a, 0x9932774d, 0xa4525efd, 0xe3f2242d, 0xde920d9d, + 0x6cb2d18d, 0x51d2f83d, 0x167282ed, 0x2b12ab5d, 0xa9423c8c, + 0x9422153c, 0xd3826fec, 0xeee2465c, 0x5cc29a4c, 0x61a2b3fc, + 0x2602c92c, 0x1b62e09c, 0xf9d2e0cf, 0xc4b2c97f, 0x8312b3af, + 0xbe729a1f, 0x0c52460f, 0x31326fbf, 0x7692156f, 0x4bf23cdf, + 0xc9a2ab0e, 0xf4c282be, 0xb362f86e, 0x8e02d1de, 0x3c220dce, + 0x0142247e, 0x46e25eae, 0x7b82771e, 0xb1e6b092, 0x8c869922, + 0xcb26e3f2, 0xf646ca42, 0x44661652, 0x79063fe2, 0x3ea64532, + 0x03c66c82, 0x8196fb53, 0xbcf6d2e3, 0xfb56a833, 0xc6368183, + 0x74165d93, 0x49767423, 0x0ed60ef3, 0x33b62743, 0xd1062710, + 0xec660ea0, 0xabc67470, 0x96a65dc0, 0x248681d0, 0x19e6a860, + 0x5e46d2b0, 0x6326fb00, 0xe1766cd1, 0xdc164561, 0x9bb63fb1, + 0xa6d61601, 0x14f6ca11, 0x2996e3a1, 0x6e369971, 0x5356b0c1, + 0x70279f96, 0x4d47b626, 0x0ae7ccf6, 0x3787e546, 0x85a73956, + 0xb8c710e6, 0xff676a36, 0xc2074386, 0x4057d457, 0x7d37fde7, + 0x3a978737, 0x07f7ae87, 0xb5d77297, 0x88b75b27, 0xcf1721f7, + 0xf2770847, 0x10c70814, 0x2da721a4, 0x6a075b74, 0x576772c4, + 0xe547aed4, 0xd8278764, 0x9f87fdb4, 0xa2e7d404, 0x20b743d5, + 0x1dd76a65, 0x5a7710b5, 0x67173905, 0xd537e515, 0xe857cca5, + 0xaff7b675, 0x92979fc5, 0xe915e8db, 0xd475c16b, 0x93d5bbbb, + 0xaeb5920b, 0x1c954e1b, 0x21f567ab, 0x66551d7b, 0x5b3534cb, + 0xd965a31a, 0xe4058aaa, 0xa3a5f07a, 0x9ec5d9ca, 0x2ce505da, + 0x11852c6a, 0x562556ba, 0x6b457f0a, 0x89f57f59, 0xb49556e9, + 0xf3352c39, 0xce550589, 0x7c75d999, 0x4115f029, 0x06b58af9, + 0x3bd5a349, 0xb9853498, 0x84e51d28, 0xc34567f8, 0xfe254e48, + 0x4c059258, 0x7165bbe8, 0x36c5c138, 0x0ba5e888, 0x28d4c7df, + 0x15b4ee6f, 0x521494bf, 0x6f74bd0f, 0xdd54611f, 0xe03448af, + 0xa794327f, 0x9af41bcf, 0x18a48c1e, 0x25c4a5ae, 0x6264df7e, + 0x5f04f6ce, 0xed242ade, 0xd044036e, 0x97e479be, 0xaa84500e, + 0x4834505d, 0x755479ed, 0x32f4033d, 0x0f942a8d, 0xbdb4f69d, + 0x80d4df2d, 0xc774a5fd, 0xfa148c4d, 0x78441b9c, 0x4524322c, + 0x028448fc, 0x3fe4614c, 0x8dc4bd5c, 0xb0a494ec, 0xf704ee3c, + 0xca64c78c}, + {0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee, 0x8f629757, + 0x37def032, 0x256b5fdc, 0x9dd738b9, 0xc5b428ef, 0x7d084f8a, + 0x6fbde064, 0xd7018701, 0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, + 0x58631056, 0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871, + 0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26, 0x95ad7f70, + 0x2d111815, 0x3fa4b7fb, 0x8718d09e, 0x1acfe827, 0xa2738f42, + 0xb0c620ac, 0x087a47c9, 0xa032af3e, 0x188ec85b, 0x0a3b67b5, + 0xb28700d0, 0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787, + 0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f, 0xeae41086, + 0x525877e3, 0x40edd80d, 0xf851bf68, 0xf02bf8a1, 0x48979fc4, + 0x5a22302a, 0xe29e574f, 0x7f496ff6, 0xc7f50893, 0xd540a77d, + 0x6dfcc018, 0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0, + 0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7, 0x9b14583d, + 0x23a83f58, 0x311d90b6, 0x89a1f7d3, 0x1476cf6a, 0xaccaa80f, + 0xbe7f07e1, 0x06c36084, 0x5ea070d2, 0xe61c17b7, 0xf4a9b859, + 0x4c15df3c, 0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b, + 0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c, 0x446f98f5, + 0xfcd3ff90, 0xee66507e, 0x56da371b, 0x0eb9274d, 0xb6054028, + 0xa4b0efc6, 0x1c0c88a3, 0x81dbb01a, 0x3967d77f, 0x2bd27891, + 0x936e1ff4, 0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed, + 0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba, 0xfe92dfec, + 0x462eb889, 0x549b1767, 0xec277002, 0x71f048bb, 0xc94c2fde, + 0xdbf98030, 0x6345e755, 0x6b3fa09c, 0xd383c7f9, 0xc1366817, + 0x798a0f72, 0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825, + 0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d, 0x21e91f24, + 0x99557841, 0x8be0d7af, 0x335cb0ca, 0xed59b63b, 0x55e5d15e, + 0x47507eb0, 0xffec19d5, 0x623b216c, 0xda874609, 0xc832e9e7, + 0x708e8e82, 0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a, + 0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d, 0xbd40e1a4, + 0x05fc86c1, 0x1749292f, 0xaff54e4a, 0x322276f3, 0x8a9e1196, + 0x982bbe78, 0x2097d91d, 0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, + 0x6a4166a5, 0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2, + 0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb, 0xc2098e52, + 0x7ab5e937, 0x680046d9, 0xd0bc21bc, 0x88df31ea, 0x3063568f, + 0x22d6f961, 0x9a6a9e04, 0x07bda6bd, 0xbf01c1d8, 0xadb46e36, + 0x15080953, 0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174, + 0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623, 0xd8c66675, + 0x607a0110, 0x72cfaefe, 0xca73c99b, 0x57a4f122, 0xef189647, + 0xfdad39a9, 0x45115ecc, 0x764dee06, 0xcef18963, 0xdc44268d, + 0x64f841e8, 0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf, + 0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907, 0x3c9b51be, + 0x842736db, 0x96929935, 0x2e2efe50, 0x2654b999, 0x9ee8defc, + 0x8c5d7112, 0x34e11677, 0xa9362ece, 0x118a49ab, 0x033fe645, + 0xbb838120, 0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98, + 0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf, 0xd67f4138, + 0x6ec3265d, 0x7c7689b3, 0xc4caeed6, 0x591dd66f, 0xe1a1b10a, + 0xf3141ee4, 0x4ba87981, 0x13cb69d7, 0xab770eb2, 0xb9c2a15c, + 0x017ec639, 0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e, + 0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949, 0x090481f0, + 0xb1b8e695, 0xa30d497b, 0x1bb12e1e, 0x43d23e48, 0xfb6e592d, + 0xe9dbf6c3, 0x516791a6, 0xccb0a91f, 0x740cce7a, 0x66b96194, + 0xde0506f1}, + {0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59, 0x0709a8dc, + 0x06cbc2eb, 0x048d7cb2, 0x054f1685, 0x0e1351b8, 0x0fd13b8f, + 0x0d9785d6, 0x0c55efe1, 0x091af964, 0x08d89353, 0x0a9e2d0a, + 0x0b5c473d, 0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29, + 0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5, 0x1235f2c8, + 0x13f798ff, 0x11b126a6, 0x10734c91, 0x153c5a14, 0x14fe3023, + 0x16b88e7a, 0x177ae44d, 0x384d46e0, 0x398f2cd7, 0x3bc9928e, + 0x3a0bf8b9, 0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065, + 0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901, 0x3157bf84, + 0x3095d5b3, 0x32d36bea, 0x331101dd, 0x246be590, 0x25a98fa7, + 0x27ef31fe, 0x262d5bc9, 0x23624d4c, 0x22a0277b, 0x20e69922, + 0x2124f315, 0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71, + 0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad, 0x709a8dc0, + 0x7158e7f7, 0x731e59ae, 0x72dc3399, 0x7793251c, 0x76514f2b, + 0x7417f172, 0x75d59b45, 0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, + 0x7ccf6221, 0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd, + 0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9, 0x6bb5866c, + 0x6a77ec5b, 0x68315202, 0x69f33835, 0x62af7f08, 0x636d153f, + 0x612bab66, 0x60e9c151, 0x65a6d7d4, 0x6464bde3, 0x662203ba, + 0x67e0698d, 0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579, + 0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5, 0x46c49a98, + 0x4706f0af, 0x45404ef6, 0x448224c1, 0x41cd3244, 0x400f5873, + 0x4249e62a, 0x438b8c1d, 0x54f16850, 0x55330267, 0x5775bc3e, + 0x56b7d609, 0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5, + 0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1, 0x5deb9134, + 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d, 0xe1351b80, 0xe0f771b7, + 0xe2b1cfee, 0xe373a5d9, 0xe63cb35c, 0xe7fed96b, 0xe5b86732, + 0xe47a0d05, 0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461, + 0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd, 0xfd13b8f0, + 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9, 0xfa1a102c, 0xfbd87a1b, + 0xf99ec442, 0xf85cae75, 0xf300e948, 0xf2c2837f, 0xf0843d26, + 0xf1465711, 0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd, + 0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339, 0xde71f5bc, + 0xdfb39f8b, 0xddf521d2, 0xdc374be5, 0xd76b0cd8, 0xd6a966ef, + 0xd4efd8b6, 0xd52db281, 0xd062a404, 0xd1a0ce33, 0xd3e6706a, + 0xd2241a5d, 0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049, + 0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895, 0xcb4dafa8, + 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1, 0xcc440774, 0xcd866d43, + 0xcfc0d31a, 0xce02b92d, 0x91af9640, 0x906dfc77, 0x922b422e, + 0x93e92819, 0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5, + 0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1, 0x98b56f24, + 0x99770513, 0x9b31bb4a, 0x9af3d17d, 0x8d893530, 0x8c4b5f07, + 0x8e0de15e, 0x8fcf8b69, 0x8a809dec, 0x8b42f7db, 0x89044982, + 0x88c623b5, 0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1, + 0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d, 0xa9e2d0a0, + 0xa820ba97, 0xaa6604ce, 0xaba46ef9, 0xaeeb787c, 0xaf29124b, + 0xad6fac12, 0xacadc625, 0xa7f18118, 0xa633eb2f, 0xa4755576, + 0xa5b73f41, 0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d, + 0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89, 0xb2cddb0c, + 0xb30fb13b, 0xb1490f62, 0xb08b6555, 0xbbd72268, 0xba15485f, + 0xb853f606, 0xb9919c31, 0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, + 0xbe9834ed}, + {0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3, 0x646cc504, + 0x7d77f445, 0x565aa786, 0x4f4196c7, 0xc8d98a08, 0xd1c2bb49, + 0xfaefe88a, 0xe3f4d9cb, 0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, + 0x87981ccf, 0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192, + 0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496, 0x821b9859, + 0x9b00a918, 0xb02dfadb, 0xa936cb9a, 0xe6775d5d, 0xff6c6c1c, + 0xd4413fdf, 0xcd5a0e9e, 0x958424a2, 0x8c9f15e3, 0xa7b24620, + 0xbea97761, 0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265, + 0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69, 0x39316bae, + 0x202a5aef, 0x0b07092c, 0x121c386d, 0xdf4636f3, 0xc65d07b2, + 0xed705471, 0xf46b6530, 0xbb2af3f7, 0xa231c2b6, 0x891c9175, + 0x9007a034, 0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38, + 0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c, 0xf0794f05, + 0xe9627e44, 0xc24f2d87, 0xdb541cc6, 0x94158a01, 0x8d0ebb40, + 0xa623e883, 0xbf38d9c2, 0x38a0c50d, 0x21bbf44c, 0x0a96a78f, + 0x138d96ce, 0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca, + 0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97, 0xded79850, + 0xc7cca911, 0xece1fad2, 0xf5facb93, 0x7262d75c, 0x6b79e61d, + 0x4054b5de, 0x594f849f, 0x160e1258, 0x0f152319, 0x243870da, + 0x3d23419b, 0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864, + 0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60, 0xad24e1af, + 0xb43fd0ee, 0x9f12832d, 0x8609b26c, 0xc94824ab, 0xd05315ea, + 0xfb7e4629, 0xe2657768, 0x2f3f79f6, 0x362448b7, 0x1d091b74, + 0x04122a35, 0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31, + 0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d, 0x838a36fa, + 0x9a9107bb, 0xb1bc5478, 0xa8a76539, 0x3b83984b, 0x2298a90a, + 0x09b5fac9, 0x10aecb88, 0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, + 0x74c20e8c, 0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180, + 0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484, 0x71418a1a, + 0x685abb5b, 0x4377e898, 0x5a6cd9d9, 0x152d4f1e, 0x0c367e5f, + 0x271b2d9c, 0x3e001cdd, 0xb9980012, 0xa0833153, 0x8bae6290, + 0x92b553d1, 0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5, + 0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a, 0xca6b79ed, + 0xd37048ac, 0xf85d1b6f, 0xe1462a2e, 0x66de36e1, 0x7fc507a0, + 0x54e85463, 0x4df36522, 0x02b2f3e5, 0x1ba9c2a4, 0x30849167, + 0x299fa026, 0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b, + 0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f, 0x2c1c24b0, + 0x350715f1, 0x1e2a4632, 0x07317773, 0x4870e1b4, 0x516bd0f5, + 0x7a468336, 0x635db277, 0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, + 0xe0d7848d, 0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189, + 0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85, 0x674f9842, + 0x7e54a903, 0x5579fac0, 0x4c62cb81, 0x8138c51f, 0x9823f45e, + 0xb30ea79d, 0xaa1596dc, 0xe554001b, 0xfc4f315a, 0xd7626299, + 0xce7953d8, 0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4, + 0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0, 0x5e7ef3ec, + 0x4765c2ad, 0x6c48916e, 0x7553a02f, 0x3a1236e8, 0x230907a9, + 0x0824546a, 0x113f652b, 0x96a779e4, 0x8fbc48a5, 0xa4911b66, + 0xbd8a2a27, 0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23, + 0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e, 0x70d024b9, + 0x69cb15f8, 0x42e6463b, 0x5bfd777a, 0xdc656bb5, 0xc57e5af4, + 0xee530937, 0xf7483876, 0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, + 0x9324fd72}, + {0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0x9630077700000000, 0x2c610eee00000000, + 0xba51099900000000, 0x19c46d0700000000, 0x8ff46a7000000000, + 0x35a563e900000000, 0xa395649e00000000, 0x3288db0e00000000, + 0xa4b8dc7900000000, 0x1ee9d5e000000000, 0x88d9d29700000000, + 0x2b4cb60900000000, 0xbd7cb17e00000000, 0x072db8e700000000, + 0x911dbf9000000000, 0x6410b71d00000000, 0xf220b06a00000000, + 0x4871b9f300000000, 0xde41be8400000000, 0x7dd4da1a00000000, + 0xebe4dd6d00000000, 0x51b5d4f400000000, 0xc785d38300000000, + 0x56986c1300000000, 0xc0a86b6400000000, 0x7af962fd00000000, + 0xecc9658a00000000, 0x4f5c011400000000, 0xd96c066300000000, + 0x633d0ffa00000000, 0xf50d088d00000000, 0xc8206e3b00000000, + 0x5e10694c00000000, 0xe44160d500000000, 0x727167a200000000, + 0xd1e4033c00000000, 0x47d4044b00000000, 0xfd850dd200000000, + 0x6bb50aa500000000, 0xfaa8b53500000000, 0x6c98b24200000000, + 0xd6c9bbdb00000000, 0x40f9bcac00000000, 0xe36cd83200000000, + 0x755cdf4500000000, 0xcf0dd6dc00000000, 0x593dd1ab00000000, + 0xac30d92600000000, 0x3a00de5100000000, 0x8051d7c800000000, + 0x1661d0bf00000000, 0xb5f4b42100000000, 0x23c4b35600000000, + 0x9995bacf00000000, 0x0fa5bdb800000000, 0x9eb8022800000000, + 0x0888055f00000000, 0xb2d90cc600000000, 0x24e90bb100000000, + 0x877c6f2f00000000, 0x114c685800000000, 0xab1d61c100000000, + 0x3d2d66b600000000, 0x9041dc7600000000, 0x0671db0100000000, + 0xbc20d29800000000, 0x2a10d5ef00000000, 0x8985b17100000000, + 0x1fb5b60600000000, 0xa5e4bf9f00000000, 0x33d4b8e800000000, + 0xa2c9077800000000, 0x34f9000f00000000, 0x8ea8099600000000, + 0x18980ee100000000, 0xbb0d6a7f00000000, 0x2d3d6d0800000000, + 0x976c649100000000, 0x015c63e600000000, 0xf4516b6b00000000, + 0x62616c1c00000000, 0xd830658500000000, 0x4e0062f200000000, + 0xed95066c00000000, 0x7ba5011b00000000, 0xc1f4088200000000, + 0x57c40ff500000000, 0xc6d9b06500000000, 0x50e9b71200000000, + 0xeab8be8b00000000, 0x7c88b9fc00000000, 0xdf1ddd6200000000, + 0x492dda1500000000, 0xf37cd38c00000000, 0x654cd4fb00000000, + 0x5861b24d00000000, 0xce51b53a00000000, 0x7400bca300000000, + 0xe230bbd400000000, 0x41a5df4a00000000, 0xd795d83d00000000, + 0x6dc4d1a400000000, 0xfbf4d6d300000000, 0x6ae9694300000000, + 0xfcd96e3400000000, 0x468867ad00000000, 0xd0b860da00000000, + 0x732d044400000000, 0xe51d033300000000, 0x5f4c0aaa00000000, + 0xc97c0ddd00000000, 0x3c71055000000000, 0xaa41022700000000, + 0x10100bbe00000000, 0x86200cc900000000, 0x25b5685700000000, + 0xb3856f2000000000, 0x09d466b900000000, 0x9fe461ce00000000, + 0x0ef9de5e00000000, 0x98c9d92900000000, 0x2298d0b000000000, + 0xb4a8d7c700000000, 0x173db35900000000, 0x810db42e00000000, + 0x3b5cbdb700000000, 0xad6cbac000000000, 0x2083b8ed00000000, + 0xb6b3bf9a00000000, 0x0ce2b60300000000, 0x9ad2b17400000000, + 0x3947d5ea00000000, 0xaf77d29d00000000, 0x1526db0400000000, + 0x8316dc7300000000, 0x120b63e300000000, 0x843b649400000000, + 0x3e6a6d0d00000000, 0xa85a6a7a00000000, 0x0bcf0ee400000000, + 0x9dff099300000000, 0x27ae000a00000000, 0xb19e077d00000000, + 0x44930ff000000000, 0xd2a3088700000000, 0x68f2011e00000000, + 0xfec2066900000000, 0x5d5762f700000000, 0xcb67658000000000, + 0x71366c1900000000, 0xe7066b6e00000000, 0x761bd4fe00000000, + 0xe02bd38900000000, 0x5a7ada1000000000, 0xcc4add6700000000, + 0x6fdfb9f900000000, 0xf9efbe8e00000000, 0x43beb71700000000, + 0xd58eb06000000000, 0xe8a3d6d600000000, 0x7e93d1a100000000, + 0xc4c2d83800000000, 0x52f2df4f00000000, 0xf167bbd100000000, + 0x6757bca600000000, 0xdd06b53f00000000, 0x4b36b24800000000, + 0xda2b0dd800000000, 0x4c1b0aaf00000000, 0xf64a033600000000, + 0x607a044100000000, 0xc3ef60df00000000, 0x55df67a800000000, + 0xef8e6e3100000000, 0x79be694600000000, 0x8cb361cb00000000, + 0x1a8366bc00000000, 0xa0d26f2500000000, 0x36e2685200000000, + 0x95770ccc00000000, 0x03470bbb00000000, 0xb916022200000000, + 0x2f26055500000000, 0xbe3bbac500000000, 0x280bbdb200000000, + 0x925ab42b00000000, 0x046ab35c00000000, 0xa7ffd7c200000000, + 0x31cfd0b500000000, 0x8b9ed92c00000000, 0x1daede5b00000000, + 0xb0c2649b00000000, 0x26f263ec00000000, 0x9ca36a7500000000, + 0x0a936d0200000000, 0xa906099c00000000, 0x3f360eeb00000000, + 0x8567077200000000, 0x1357000500000000, 0x824abf9500000000, + 0x147ab8e200000000, 0xae2bb17b00000000, 0x381bb60c00000000, + 0x9b8ed29200000000, 0x0dbed5e500000000, 0xb7efdc7c00000000, + 0x21dfdb0b00000000, 0xd4d2d38600000000, 0x42e2d4f100000000, + 0xf8b3dd6800000000, 0x6e83da1f00000000, 0xcd16be8100000000, + 0x5b26b9f600000000, 0xe177b06f00000000, 0x7747b71800000000, + 0xe65a088800000000, 0x706a0fff00000000, 0xca3b066600000000, + 0x5c0b011100000000, 0xff9e658f00000000, 0x69ae62f800000000, + 0xd3ff6b6100000000, 0x45cf6c1600000000, 0x78e20aa000000000, + 0xeed20dd700000000, 0x5483044e00000000, 0xc2b3033900000000, + 0x612667a700000000, 0xf71660d000000000, 0x4d47694900000000, + 0xdb776e3e00000000, 0x4a6ad1ae00000000, 0xdc5ad6d900000000, + 0x660bdf4000000000, 0xf03bd83700000000, 0x53aebca900000000, + 0xc59ebbde00000000, 0x7fcfb24700000000, 0xe9ffb53000000000, + 0x1cf2bdbd00000000, 0x8ac2baca00000000, 0x3093b35300000000, + 0xa6a3b42400000000, 0x0536d0ba00000000, 0x9306d7cd00000000, + 0x2957de5400000000, 0xbf67d92300000000, 0x2e7a66b300000000, + 0xb84a61c400000000, 0x021b685d00000000, 0x942b6f2a00000000, + 0x37be0bb400000000, 0xa18e0cc300000000, 0x1bdf055a00000000, + 0x8def022d00000000}, + {0x0000000000000000, 0x41311b1900000000, 0x8262363200000000, + 0xc3532d2b00000000, 0x04c56c6400000000, 0x45f4777d00000000, + 0x86a75a5600000000, 0xc796414f00000000, 0x088ad9c800000000, + 0x49bbc2d100000000, 0x8ae8effa00000000, 0xcbd9f4e300000000, + 0x0c4fb5ac00000000, 0x4d7eaeb500000000, 0x8e2d839e00000000, + 0xcf1c988700000000, 0x5112c24a00000000, 0x1023d95300000000, + 0xd370f47800000000, 0x9241ef6100000000, 0x55d7ae2e00000000, + 0x14e6b53700000000, 0xd7b5981c00000000, 0x9684830500000000, + 0x59981b8200000000, 0x18a9009b00000000, 0xdbfa2db000000000, + 0x9acb36a900000000, 0x5d5d77e600000000, 0x1c6c6cff00000000, + 0xdf3f41d400000000, 0x9e0e5acd00000000, 0xa224849500000000, + 0xe3159f8c00000000, 0x2046b2a700000000, 0x6177a9be00000000, + 0xa6e1e8f100000000, 0xe7d0f3e800000000, 0x2483dec300000000, + 0x65b2c5da00000000, 0xaaae5d5d00000000, 0xeb9f464400000000, + 0x28cc6b6f00000000, 0x69fd707600000000, 0xae6b313900000000, + 0xef5a2a2000000000, 0x2c09070b00000000, 0x6d381c1200000000, + 0xf33646df00000000, 0xb2075dc600000000, 0x715470ed00000000, + 0x30656bf400000000, 0xf7f32abb00000000, 0xb6c231a200000000, + 0x75911c8900000000, 0x34a0079000000000, 0xfbbc9f1700000000, + 0xba8d840e00000000, 0x79dea92500000000, 0x38efb23c00000000, + 0xff79f37300000000, 0xbe48e86a00000000, 0x7d1bc54100000000, + 0x3c2ade5800000000, 0x054f79f000000000, 0x447e62e900000000, + 0x872d4fc200000000, 0xc61c54db00000000, 0x018a159400000000, + 0x40bb0e8d00000000, 0x83e823a600000000, 0xc2d938bf00000000, + 0x0dc5a03800000000, 0x4cf4bb2100000000, 0x8fa7960a00000000, + 0xce968d1300000000, 0x0900cc5c00000000, 0x4831d74500000000, + 0x8b62fa6e00000000, 0xca53e17700000000, 0x545dbbba00000000, + 0x156ca0a300000000, 0xd63f8d8800000000, 0x970e969100000000, + 0x5098d7de00000000, 0x11a9ccc700000000, 0xd2fae1ec00000000, + 0x93cbfaf500000000, 0x5cd7627200000000, 0x1de6796b00000000, + 0xdeb5544000000000, 0x9f844f5900000000, 0x58120e1600000000, + 0x1923150f00000000, 0xda70382400000000, 0x9b41233d00000000, + 0xa76bfd6500000000, 0xe65ae67c00000000, 0x2509cb5700000000, + 0x6438d04e00000000, 0xa3ae910100000000, 0xe29f8a1800000000, + 0x21cca73300000000, 0x60fdbc2a00000000, 0xafe124ad00000000, + 0xeed03fb400000000, 0x2d83129f00000000, 0x6cb2098600000000, + 0xab2448c900000000, 0xea1553d000000000, 0x29467efb00000000, + 0x687765e200000000, 0xf6793f2f00000000, 0xb748243600000000, + 0x741b091d00000000, 0x352a120400000000, 0xf2bc534b00000000, + 0xb38d485200000000, 0x70de657900000000, 0x31ef7e6000000000, + 0xfef3e6e700000000, 0xbfc2fdfe00000000, 0x7c91d0d500000000, + 0x3da0cbcc00000000, 0xfa368a8300000000, 0xbb07919a00000000, + 0x7854bcb100000000, 0x3965a7a800000000, 0x4b98833b00000000, + 0x0aa9982200000000, 0xc9fab50900000000, 0x88cbae1000000000, + 0x4f5def5f00000000, 0x0e6cf44600000000, 0xcd3fd96d00000000, + 0x8c0ec27400000000, 0x43125af300000000, 0x022341ea00000000, + 0xc1706cc100000000, 0x804177d800000000, 0x47d7369700000000, + 0x06e62d8e00000000, 0xc5b500a500000000, 0x84841bbc00000000, + 0x1a8a417100000000, 0x5bbb5a6800000000, 0x98e8774300000000, + 0xd9d96c5a00000000, 0x1e4f2d1500000000, 0x5f7e360c00000000, + 0x9c2d1b2700000000, 0xdd1c003e00000000, 0x120098b900000000, + 0x533183a000000000, 0x9062ae8b00000000, 0xd153b59200000000, + 0x16c5f4dd00000000, 0x57f4efc400000000, 0x94a7c2ef00000000, + 0xd596d9f600000000, 0xe9bc07ae00000000, 0xa88d1cb700000000, + 0x6bde319c00000000, 0x2aef2a8500000000, 0xed796bca00000000, + 0xac4870d300000000, 0x6f1b5df800000000, 0x2e2a46e100000000, + 0xe136de6600000000, 0xa007c57f00000000, 0x6354e85400000000, + 0x2265f34d00000000, 0xe5f3b20200000000, 0xa4c2a91b00000000, + 0x6791843000000000, 0x26a09f2900000000, 0xb8aec5e400000000, + 0xf99fdefd00000000, 0x3accf3d600000000, 0x7bfde8cf00000000, + 0xbc6ba98000000000, 0xfd5ab29900000000, 0x3e099fb200000000, + 0x7f3884ab00000000, 0xb0241c2c00000000, 0xf115073500000000, + 0x32462a1e00000000, 0x7377310700000000, 0xb4e1704800000000, + 0xf5d06b5100000000, 0x3683467a00000000, 0x77b25d6300000000, + 0x4ed7facb00000000, 0x0fe6e1d200000000, 0xccb5ccf900000000, + 0x8d84d7e000000000, 0x4a1296af00000000, 0x0b238db600000000, + 0xc870a09d00000000, 0x8941bb8400000000, 0x465d230300000000, + 0x076c381a00000000, 0xc43f153100000000, 0x850e0e2800000000, + 0x42984f6700000000, 0x03a9547e00000000, 0xc0fa795500000000, + 0x81cb624c00000000, 0x1fc5388100000000, 0x5ef4239800000000, + 0x9da70eb300000000, 0xdc9615aa00000000, 0x1b0054e500000000, + 0x5a314ffc00000000, 0x996262d700000000, 0xd85379ce00000000, + 0x174fe14900000000, 0x567efa5000000000, 0x952dd77b00000000, + 0xd41ccc6200000000, 0x138a8d2d00000000, 0x52bb963400000000, + 0x91e8bb1f00000000, 0xd0d9a00600000000, 0xecf37e5e00000000, + 0xadc2654700000000, 0x6e91486c00000000, 0x2fa0537500000000, + 0xe836123a00000000, 0xa907092300000000, 0x6a54240800000000, + 0x2b653f1100000000, 0xe479a79600000000, 0xa548bc8f00000000, + 0x661b91a400000000, 0x272a8abd00000000, 0xe0bccbf200000000, + 0xa18dd0eb00000000, 0x62defdc000000000, 0x23efe6d900000000, + 0xbde1bc1400000000, 0xfcd0a70d00000000, 0x3f838a2600000000, + 0x7eb2913f00000000, 0xb924d07000000000, 0xf815cb6900000000, + 0x3b46e64200000000, 0x7a77fd5b00000000, 0xb56b65dc00000000, + 0xf45a7ec500000000, 0x370953ee00000000, 0x763848f700000000, + 0xb1ae09b800000000, 0xf09f12a100000000, 0x33cc3f8a00000000, + 0x72fd249300000000}, + {0x0000000000000000, 0x376ac20100000000, 0x6ed4840300000000, + 0x59be460200000000, 0xdca8090700000000, 0xebc2cb0600000000, + 0xb27c8d0400000000, 0x85164f0500000000, 0xb851130e00000000, + 0x8f3bd10f00000000, 0xd685970d00000000, 0xe1ef550c00000000, + 0x64f91a0900000000, 0x5393d80800000000, 0x0a2d9e0a00000000, + 0x3d475c0b00000000, 0x70a3261c00000000, 0x47c9e41d00000000, + 0x1e77a21f00000000, 0x291d601e00000000, 0xac0b2f1b00000000, + 0x9b61ed1a00000000, 0xc2dfab1800000000, 0xf5b5691900000000, + 0xc8f2351200000000, 0xff98f71300000000, 0xa626b11100000000, + 0x914c731000000000, 0x145a3c1500000000, 0x2330fe1400000000, + 0x7a8eb81600000000, 0x4de47a1700000000, 0xe0464d3800000000, + 0xd72c8f3900000000, 0x8e92c93b00000000, 0xb9f80b3a00000000, + 0x3cee443f00000000, 0x0b84863e00000000, 0x523ac03c00000000, + 0x6550023d00000000, 0x58175e3600000000, 0x6f7d9c3700000000, + 0x36c3da3500000000, 0x01a9183400000000, 0x84bf573100000000, + 0xb3d5953000000000, 0xea6bd33200000000, 0xdd01113300000000, + 0x90e56b2400000000, 0xa78fa92500000000, 0xfe31ef2700000000, + 0xc95b2d2600000000, 0x4c4d622300000000, 0x7b27a02200000000, + 0x2299e62000000000, 0x15f3242100000000, 0x28b4782a00000000, + 0x1fdeba2b00000000, 0x4660fc2900000000, 0x710a3e2800000000, + 0xf41c712d00000000, 0xc376b32c00000000, 0x9ac8f52e00000000, + 0xada2372f00000000, 0xc08d9a7000000000, 0xf7e7587100000000, + 0xae591e7300000000, 0x9933dc7200000000, 0x1c25937700000000, + 0x2b4f517600000000, 0x72f1177400000000, 0x459bd57500000000, + 0x78dc897e00000000, 0x4fb64b7f00000000, 0x16080d7d00000000, + 0x2162cf7c00000000, 0xa474807900000000, 0x931e427800000000, + 0xcaa0047a00000000, 0xfdcac67b00000000, 0xb02ebc6c00000000, + 0x87447e6d00000000, 0xdefa386f00000000, 0xe990fa6e00000000, + 0x6c86b56b00000000, 0x5bec776a00000000, 0x0252316800000000, + 0x3538f36900000000, 0x087faf6200000000, 0x3f156d6300000000, + 0x66ab2b6100000000, 0x51c1e96000000000, 0xd4d7a66500000000, + 0xe3bd646400000000, 0xba03226600000000, 0x8d69e06700000000, + 0x20cbd74800000000, 0x17a1154900000000, 0x4e1f534b00000000, + 0x7975914a00000000, 0xfc63de4f00000000, 0xcb091c4e00000000, + 0x92b75a4c00000000, 0xa5dd984d00000000, 0x989ac44600000000, + 0xaff0064700000000, 0xf64e404500000000, 0xc124824400000000, + 0x4432cd4100000000, 0x73580f4000000000, 0x2ae6494200000000, + 0x1d8c8b4300000000, 0x5068f15400000000, 0x6702335500000000, + 0x3ebc755700000000, 0x09d6b75600000000, 0x8cc0f85300000000, + 0xbbaa3a5200000000, 0xe2147c5000000000, 0xd57ebe5100000000, + 0xe839e25a00000000, 0xdf53205b00000000, 0x86ed665900000000, + 0xb187a45800000000, 0x3491eb5d00000000, 0x03fb295c00000000, + 0x5a456f5e00000000, 0x6d2fad5f00000000, 0x801b35e100000000, + 0xb771f7e000000000, 0xeecfb1e200000000, 0xd9a573e300000000, + 0x5cb33ce600000000, 0x6bd9fee700000000, 0x3267b8e500000000, + 0x050d7ae400000000, 0x384a26ef00000000, 0x0f20e4ee00000000, + 0x569ea2ec00000000, 0x61f460ed00000000, 0xe4e22fe800000000, + 0xd388ede900000000, 0x8a36abeb00000000, 0xbd5c69ea00000000, + 0xf0b813fd00000000, 0xc7d2d1fc00000000, 0x9e6c97fe00000000, + 0xa90655ff00000000, 0x2c101afa00000000, 0x1b7ad8fb00000000, + 0x42c49ef900000000, 0x75ae5cf800000000, 0x48e900f300000000, + 0x7f83c2f200000000, 0x263d84f000000000, 0x115746f100000000, + 0x944109f400000000, 0xa32bcbf500000000, 0xfa958df700000000, + 0xcdff4ff600000000, 0x605d78d900000000, 0x5737bad800000000, + 0x0e89fcda00000000, 0x39e33edb00000000, 0xbcf571de00000000, + 0x8b9fb3df00000000, 0xd221f5dd00000000, 0xe54b37dc00000000, + 0xd80c6bd700000000, 0xef66a9d600000000, 0xb6d8efd400000000, + 0x81b22dd500000000, 0x04a462d000000000, 0x33cea0d100000000, + 0x6a70e6d300000000, 0x5d1a24d200000000, 0x10fe5ec500000000, + 0x27949cc400000000, 0x7e2adac600000000, 0x494018c700000000, + 0xcc5657c200000000, 0xfb3c95c300000000, 0xa282d3c100000000, + 0x95e811c000000000, 0xa8af4dcb00000000, 0x9fc58fca00000000, + 0xc67bc9c800000000, 0xf1110bc900000000, 0x740744cc00000000, + 0x436d86cd00000000, 0x1ad3c0cf00000000, 0x2db902ce00000000, + 0x4096af9100000000, 0x77fc6d9000000000, 0x2e422b9200000000, + 0x1928e99300000000, 0x9c3ea69600000000, 0xab54649700000000, + 0xf2ea229500000000, 0xc580e09400000000, 0xf8c7bc9f00000000, + 0xcfad7e9e00000000, 0x9613389c00000000, 0xa179fa9d00000000, + 0x246fb59800000000, 0x1305779900000000, 0x4abb319b00000000, + 0x7dd1f39a00000000, 0x3035898d00000000, 0x075f4b8c00000000, + 0x5ee10d8e00000000, 0x698bcf8f00000000, 0xec9d808a00000000, + 0xdbf7428b00000000, 0x8249048900000000, 0xb523c68800000000, + 0x88649a8300000000, 0xbf0e588200000000, 0xe6b01e8000000000, + 0xd1dadc8100000000, 0x54cc938400000000, 0x63a6518500000000, + 0x3a18178700000000, 0x0d72d58600000000, 0xa0d0e2a900000000, + 0x97ba20a800000000, 0xce0466aa00000000, 0xf96ea4ab00000000, + 0x7c78ebae00000000, 0x4b1229af00000000, 0x12ac6fad00000000, + 0x25c6adac00000000, 0x1881f1a700000000, 0x2feb33a600000000, + 0x765575a400000000, 0x413fb7a500000000, 0xc429f8a000000000, + 0xf3433aa100000000, 0xaafd7ca300000000, 0x9d97bea200000000, + 0xd073c4b500000000, 0xe71906b400000000, 0xbea740b600000000, + 0x89cd82b700000000, 0x0cdbcdb200000000, 0x3bb10fb300000000, + 0x620f49b100000000, 0x55658bb000000000, 0x6822d7bb00000000, + 0x5f4815ba00000000, 0x06f653b800000000, 0x319c91b900000000, + 0xb48adebc00000000, 0x83e01cbd00000000, 0xda5e5abf00000000, + 0xed3498be00000000}, + {0x0000000000000000, 0x6567bcb800000000, 0x8bc809aa00000000, + 0xeeafb51200000000, 0x5797628f00000000, 0x32f0de3700000000, + 0xdc5f6b2500000000, 0xb938d79d00000000, 0xef28b4c500000000, + 0x8a4f087d00000000, 0x64e0bd6f00000000, 0x018701d700000000, + 0xb8bfd64a00000000, 0xddd86af200000000, 0x3377dfe000000000, + 0x5610635800000000, 0x9f57195000000000, 0xfa30a5e800000000, + 0x149f10fa00000000, 0x71f8ac4200000000, 0xc8c07bdf00000000, + 0xada7c76700000000, 0x4308727500000000, 0x266fcecd00000000, + 0x707fad9500000000, 0x1518112d00000000, 0xfbb7a43f00000000, + 0x9ed0188700000000, 0x27e8cf1a00000000, 0x428f73a200000000, + 0xac20c6b000000000, 0xc9477a0800000000, 0x3eaf32a000000000, + 0x5bc88e1800000000, 0xb5673b0a00000000, 0xd00087b200000000, + 0x6938502f00000000, 0x0c5fec9700000000, 0xe2f0598500000000, + 0x8797e53d00000000, 0xd187866500000000, 0xb4e03add00000000, + 0x5a4f8fcf00000000, 0x3f28337700000000, 0x8610e4ea00000000, + 0xe377585200000000, 0x0dd8ed4000000000, 0x68bf51f800000000, + 0xa1f82bf000000000, 0xc49f974800000000, 0x2a30225a00000000, + 0x4f579ee200000000, 0xf66f497f00000000, 0x9308f5c700000000, + 0x7da740d500000000, 0x18c0fc6d00000000, 0x4ed09f3500000000, + 0x2bb7238d00000000, 0xc518969f00000000, 0xa07f2a2700000000, + 0x1947fdba00000000, 0x7c20410200000000, 0x928ff41000000000, + 0xf7e848a800000000, 0x3d58149b00000000, 0x583fa82300000000, + 0xb6901d3100000000, 0xd3f7a18900000000, 0x6acf761400000000, + 0x0fa8caac00000000, 0xe1077fbe00000000, 0x8460c30600000000, + 0xd270a05e00000000, 0xb7171ce600000000, 0x59b8a9f400000000, + 0x3cdf154c00000000, 0x85e7c2d100000000, 0xe0807e6900000000, + 0x0e2fcb7b00000000, 0x6b4877c300000000, 0xa20f0dcb00000000, + 0xc768b17300000000, 0x29c7046100000000, 0x4ca0b8d900000000, + 0xf5986f4400000000, 0x90ffd3fc00000000, 0x7e5066ee00000000, + 0x1b37da5600000000, 0x4d27b90e00000000, 0x284005b600000000, + 0xc6efb0a400000000, 0xa3880c1c00000000, 0x1ab0db8100000000, + 0x7fd7673900000000, 0x9178d22b00000000, 0xf41f6e9300000000, + 0x03f7263b00000000, 0x66909a8300000000, 0x883f2f9100000000, + 0xed58932900000000, 0x546044b400000000, 0x3107f80c00000000, + 0xdfa84d1e00000000, 0xbacff1a600000000, 0xecdf92fe00000000, + 0x89b82e4600000000, 0x67179b5400000000, 0x027027ec00000000, + 0xbb48f07100000000, 0xde2f4cc900000000, 0x3080f9db00000000, + 0x55e7456300000000, 0x9ca03f6b00000000, 0xf9c783d300000000, + 0x176836c100000000, 0x720f8a7900000000, 0xcb375de400000000, + 0xae50e15c00000000, 0x40ff544e00000000, 0x2598e8f600000000, + 0x73888bae00000000, 0x16ef371600000000, 0xf840820400000000, + 0x9d273ebc00000000, 0x241fe92100000000, 0x4178559900000000, + 0xafd7e08b00000000, 0xcab05c3300000000, 0x3bb659ed00000000, + 0x5ed1e55500000000, 0xb07e504700000000, 0xd519ecff00000000, + 0x6c213b6200000000, 0x094687da00000000, 0xe7e932c800000000, + 0x828e8e7000000000, 0xd49eed2800000000, 0xb1f9519000000000, + 0x5f56e48200000000, 0x3a31583a00000000, 0x83098fa700000000, + 0xe66e331f00000000, 0x08c1860d00000000, 0x6da63ab500000000, + 0xa4e140bd00000000, 0xc186fc0500000000, 0x2f29491700000000, + 0x4a4ef5af00000000, 0xf376223200000000, 0x96119e8a00000000, + 0x78be2b9800000000, 0x1dd9972000000000, 0x4bc9f47800000000, + 0x2eae48c000000000, 0xc001fdd200000000, 0xa566416a00000000, + 0x1c5e96f700000000, 0x79392a4f00000000, 0x97969f5d00000000, + 0xf2f123e500000000, 0x05196b4d00000000, 0x607ed7f500000000, + 0x8ed162e700000000, 0xebb6de5f00000000, 0x528e09c200000000, + 0x37e9b57a00000000, 0xd946006800000000, 0xbc21bcd000000000, + 0xea31df8800000000, 0x8f56633000000000, 0x61f9d62200000000, + 0x049e6a9a00000000, 0xbda6bd0700000000, 0xd8c101bf00000000, + 0x366eb4ad00000000, 0x5309081500000000, 0x9a4e721d00000000, + 0xff29cea500000000, 0x11867bb700000000, 0x74e1c70f00000000, + 0xcdd9109200000000, 0xa8beac2a00000000, 0x4611193800000000, + 0x2376a58000000000, 0x7566c6d800000000, 0x10017a6000000000, + 0xfeaecf7200000000, 0x9bc973ca00000000, 0x22f1a45700000000, + 0x479618ef00000000, 0xa939adfd00000000, 0xcc5e114500000000, + 0x06ee4d7600000000, 0x6389f1ce00000000, 0x8d2644dc00000000, + 0xe841f86400000000, 0x51792ff900000000, 0x341e934100000000, + 0xdab1265300000000, 0xbfd69aeb00000000, 0xe9c6f9b300000000, + 0x8ca1450b00000000, 0x620ef01900000000, 0x07694ca100000000, + 0xbe519b3c00000000, 0xdb36278400000000, 0x3599929600000000, + 0x50fe2e2e00000000, 0x99b9542600000000, 0xfcdee89e00000000, + 0x12715d8c00000000, 0x7716e13400000000, 0xce2e36a900000000, + 0xab498a1100000000, 0x45e63f0300000000, 0x208183bb00000000, + 0x7691e0e300000000, 0x13f65c5b00000000, 0xfd59e94900000000, + 0x983e55f100000000, 0x2106826c00000000, 0x44613ed400000000, + 0xaace8bc600000000, 0xcfa9377e00000000, 0x38417fd600000000, + 0x5d26c36e00000000, 0xb389767c00000000, 0xd6eecac400000000, + 0x6fd61d5900000000, 0x0ab1a1e100000000, 0xe41e14f300000000, + 0x8179a84b00000000, 0xd769cb1300000000, 0xb20e77ab00000000, + 0x5ca1c2b900000000, 0x39c67e0100000000, 0x80fea99c00000000, + 0xe599152400000000, 0x0b36a03600000000, 0x6e511c8e00000000, + 0xa716668600000000, 0xc271da3e00000000, 0x2cde6f2c00000000, + 0x49b9d39400000000, 0xf081040900000000, 0x95e6b8b100000000, + 0x7b490da300000000, 0x1e2eb11b00000000, 0x483ed24300000000, + 0x2d596efb00000000, 0xc3f6dbe900000000, 0xa691675100000000, + 0x1fa9b0cc00000000, 0x7ace0c7400000000, 0x9461b96600000000, + 0xf10605de00000000}, + {0x0000000000000000, 0xb029603d00000000, 0x6053c07a00000000, + 0xd07aa04700000000, 0xc0a680f500000000, 0x708fe0c800000000, + 0xa0f5408f00000000, 0x10dc20b200000000, 0xc14b703000000000, + 0x7162100d00000000, 0xa118b04a00000000, 0x1131d07700000000, + 0x01edf0c500000000, 0xb1c490f800000000, 0x61be30bf00000000, + 0xd197508200000000, 0x8297e06000000000, 0x32be805d00000000, + 0xe2c4201a00000000, 0x52ed402700000000, 0x4231609500000000, + 0xf21800a800000000, 0x2262a0ef00000000, 0x924bc0d200000000, + 0x43dc905000000000, 0xf3f5f06d00000000, 0x238f502a00000000, + 0x93a6301700000000, 0x837a10a500000000, 0x3353709800000000, + 0xe329d0df00000000, 0x5300b0e200000000, 0x042fc1c100000000, + 0xb406a1fc00000000, 0x647c01bb00000000, 0xd455618600000000, + 0xc489413400000000, 0x74a0210900000000, 0xa4da814e00000000, + 0x14f3e17300000000, 0xc564b1f100000000, 0x754dd1cc00000000, + 0xa537718b00000000, 0x151e11b600000000, 0x05c2310400000000, + 0xb5eb513900000000, 0x6591f17e00000000, 0xd5b8914300000000, + 0x86b821a100000000, 0x3691419c00000000, 0xe6ebe1db00000000, + 0x56c281e600000000, 0x461ea15400000000, 0xf637c16900000000, + 0x264d612e00000000, 0x9664011300000000, 0x47f3519100000000, + 0xf7da31ac00000000, 0x27a091eb00000000, 0x9789f1d600000000, + 0x8755d16400000000, 0x377cb15900000000, 0xe706111e00000000, + 0x572f712300000000, 0x4958f35800000000, 0xf971936500000000, + 0x290b332200000000, 0x9922531f00000000, 0x89fe73ad00000000, + 0x39d7139000000000, 0xe9adb3d700000000, 0x5984d3ea00000000, + 0x8813836800000000, 0x383ae35500000000, 0xe840431200000000, + 0x5869232f00000000, 0x48b5039d00000000, 0xf89c63a000000000, + 0x28e6c3e700000000, 0x98cfa3da00000000, 0xcbcf133800000000, + 0x7be6730500000000, 0xab9cd34200000000, 0x1bb5b37f00000000, + 0x0b6993cd00000000, 0xbb40f3f000000000, 0x6b3a53b700000000, + 0xdb13338a00000000, 0x0a84630800000000, 0xbaad033500000000, + 0x6ad7a37200000000, 0xdafec34f00000000, 0xca22e3fd00000000, + 0x7a0b83c000000000, 0xaa71238700000000, 0x1a5843ba00000000, + 0x4d77329900000000, 0xfd5e52a400000000, 0x2d24f2e300000000, + 0x9d0d92de00000000, 0x8dd1b26c00000000, 0x3df8d25100000000, + 0xed82721600000000, 0x5dab122b00000000, 0x8c3c42a900000000, + 0x3c15229400000000, 0xec6f82d300000000, 0x5c46e2ee00000000, + 0x4c9ac25c00000000, 0xfcb3a26100000000, 0x2cc9022600000000, + 0x9ce0621b00000000, 0xcfe0d2f900000000, 0x7fc9b2c400000000, + 0xafb3128300000000, 0x1f9a72be00000000, 0x0f46520c00000000, + 0xbf6f323100000000, 0x6f15927600000000, 0xdf3cf24b00000000, + 0x0eaba2c900000000, 0xbe82c2f400000000, 0x6ef862b300000000, + 0xded1028e00000000, 0xce0d223c00000000, 0x7e24420100000000, + 0xae5ee24600000000, 0x1e77827b00000000, 0x92b0e6b100000000, + 0x2299868c00000000, 0xf2e326cb00000000, 0x42ca46f600000000, + 0x5216664400000000, 0xe23f067900000000, 0x3245a63e00000000, + 0x826cc60300000000, 0x53fb968100000000, 0xe3d2f6bc00000000, + 0x33a856fb00000000, 0x838136c600000000, 0x935d167400000000, + 0x2374764900000000, 0xf30ed60e00000000, 0x4327b63300000000, + 0x102706d100000000, 0xa00e66ec00000000, 0x7074c6ab00000000, + 0xc05da69600000000, 0xd081862400000000, 0x60a8e61900000000, + 0xb0d2465e00000000, 0x00fb266300000000, 0xd16c76e100000000, + 0x614516dc00000000, 0xb13fb69b00000000, 0x0116d6a600000000, + 0x11caf61400000000, 0xa1e3962900000000, 0x7199366e00000000, + 0xc1b0565300000000, 0x969f277000000000, 0x26b6474d00000000, + 0xf6cce70a00000000, 0x46e5873700000000, 0x5639a78500000000, + 0xe610c7b800000000, 0x366a67ff00000000, 0x864307c200000000, + 0x57d4574000000000, 0xe7fd377d00000000, 0x3787973a00000000, + 0x87aef70700000000, 0x9772d7b500000000, 0x275bb78800000000, + 0xf72117cf00000000, 0x470877f200000000, 0x1408c71000000000, + 0xa421a72d00000000, 0x745b076a00000000, 0xc472675700000000, + 0xd4ae47e500000000, 0x648727d800000000, 0xb4fd879f00000000, + 0x04d4e7a200000000, 0xd543b72000000000, 0x656ad71d00000000, + 0xb510775a00000000, 0x0539176700000000, 0x15e537d500000000, + 0xa5cc57e800000000, 0x75b6f7af00000000, 0xc59f979200000000, + 0xdbe815e900000000, 0x6bc175d400000000, 0xbbbbd59300000000, + 0x0b92b5ae00000000, 0x1b4e951c00000000, 0xab67f52100000000, + 0x7b1d556600000000, 0xcb34355b00000000, 0x1aa365d900000000, + 0xaa8a05e400000000, 0x7af0a5a300000000, 0xcad9c59e00000000, + 0xda05e52c00000000, 0x6a2c851100000000, 0xba56255600000000, + 0x0a7f456b00000000, 0x597ff58900000000, 0xe95695b400000000, + 0x392c35f300000000, 0x890555ce00000000, 0x99d9757c00000000, + 0x29f0154100000000, 0xf98ab50600000000, 0x49a3d53b00000000, + 0x983485b900000000, 0x281de58400000000, 0xf86745c300000000, + 0x484e25fe00000000, 0x5892054c00000000, 0xe8bb657100000000, + 0x38c1c53600000000, 0x88e8a50b00000000, 0xdfc7d42800000000, + 0x6feeb41500000000, 0xbf94145200000000, 0x0fbd746f00000000, + 0x1f6154dd00000000, 0xaf4834e000000000, 0x7f3294a700000000, + 0xcf1bf49a00000000, 0x1e8ca41800000000, 0xaea5c42500000000, + 0x7edf646200000000, 0xcef6045f00000000, 0xde2a24ed00000000, + 0x6e0344d000000000, 0xbe79e49700000000, 0x0e5084aa00000000, + 0x5d50344800000000, 0xed79547500000000, 0x3d03f43200000000, + 0x8d2a940f00000000, 0x9df6b4bd00000000, 0x2ddfd48000000000, + 0xfda574c700000000, 0x4d8c14fa00000000, 0x9c1b447800000000, + 0x2c32244500000000, 0xfc48840200000000, 0x4c61e43f00000000, + 0x5cbdc48d00000000, 0xec94a4b000000000, 0x3cee04f700000000, + 0x8cc764ca00000000}, + {0x0000000000000000, 0xa5d35ccb00000000, 0x0ba1c84d00000000, + 0xae72948600000000, 0x1642919b00000000, 0xb391cd5000000000, + 0x1de359d600000000, 0xb830051d00000000, 0x6d8253ec00000000, + 0xc8510f2700000000, 0x66239ba100000000, 0xc3f0c76a00000000, + 0x7bc0c27700000000, 0xde139ebc00000000, 0x70610a3a00000000, + 0xd5b256f100000000, 0x9b02d60300000000, 0x3ed18ac800000000, + 0x90a31e4e00000000, 0x3570428500000000, 0x8d40479800000000, + 0x28931b5300000000, 0x86e18fd500000000, 0x2332d31e00000000, + 0xf68085ef00000000, 0x5353d92400000000, 0xfd214da200000000, + 0x58f2116900000000, 0xe0c2147400000000, 0x451148bf00000000, + 0xeb63dc3900000000, 0x4eb080f200000000, 0x3605ac0700000000, + 0x93d6f0cc00000000, 0x3da4644a00000000, 0x9877388100000000, + 0x20473d9c00000000, 0x8594615700000000, 0x2be6f5d100000000, + 0x8e35a91a00000000, 0x5b87ffeb00000000, 0xfe54a32000000000, + 0x502637a600000000, 0xf5f56b6d00000000, 0x4dc56e7000000000, + 0xe81632bb00000000, 0x4664a63d00000000, 0xe3b7faf600000000, + 0xad077a0400000000, 0x08d426cf00000000, 0xa6a6b24900000000, + 0x0375ee8200000000, 0xbb45eb9f00000000, 0x1e96b75400000000, + 0xb0e423d200000000, 0x15377f1900000000, 0xc08529e800000000, + 0x6556752300000000, 0xcb24e1a500000000, 0x6ef7bd6e00000000, + 0xd6c7b87300000000, 0x7314e4b800000000, 0xdd66703e00000000, + 0x78b52cf500000000, 0x6c0a580f00000000, 0xc9d904c400000000, + 0x67ab904200000000, 0xc278cc8900000000, 0x7a48c99400000000, + 0xdf9b955f00000000, 0x71e901d900000000, 0xd43a5d1200000000, + 0x01880be300000000, 0xa45b572800000000, 0x0a29c3ae00000000, + 0xaffa9f6500000000, 0x17ca9a7800000000, 0xb219c6b300000000, + 0x1c6b523500000000, 0xb9b80efe00000000, 0xf7088e0c00000000, + 0x52dbd2c700000000, 0xfca9464100000000, 0x597a1a8a00000000, + 0xe14a1f9700000000, 0x4499435c00000000, 0xeaebd7da00000000, + 0x4f388b1100000000, 0x9a8adde000000000, 0x3f59812b00000000, + 0x912b15ad00000000, 0x34f8496600000000, 0x8cc84c7b00000000, + 0x291b10b000000000, 0x8769843600000000, 0x22bad8fd00000000, + 0x5a0ff40800000000, 0xffdca8c300000000, 0x51ae3c4500000000, + 0xf47d608e00000000, 0x4c4d659300000000, 0xe99e395800000000, + 0x47ecadde00000000, 0xe23ff11500000000, 0x378da7e400000000, + 0x925efb2f00000000, 0x3c2c6fa900000000, 0x99ff336200000000, + 0x21cf367f00000000, 0x841c6ab400000000, 0x2a6efe3200000000, + 0x8fbda2f900000000, 0xc10d220b00000000, 0x64de7ec000000000, + 0xcaacea4600000000, 0x6f7fb68d00000000, 0xd74fb39000000000, + 0x729cef5b00000000, 0xdcee7bdd00000000, 0x793d271600000000, + 0xac8f71e700000000, 0x095c2d2c00000000, 0xa72eb9aa00000000, + 0x02fde56100000000, 0xbacde07c00000000, 0x1f1ebcb700000000, + 0xb16c283100000000, 0x14bf74fa00000000, 0xd814b01e00000000, + 0x7dc7ecd500000000, 0xd3b5785300000000, 0x7666249800000000, + 0xce56218500000000, 0x6b857d4e00000000, 0xc5f7e9c800000000, + 0x6024b50300000000, 0xb596e3f200000000, 0x1045bf3900000000, + 0xbe372bbf00000000, 0x1be4777400000000, 0xa3d4726900000000, + 0x06072ea200000000, 0xa875ba2400000000, 0x0da6e6ef00000000, + 0x4316661d00000000, 0xe6c53ad600000000, 0x48b7ae5000000000, + 0xed64f29b00000000, 0x5554f78600000000, 0xf087ab4d00000000, + 0x5ef53fcb00000000, 0xfb26630000000000, 0x2e9435f100000000, + 0x8b47693a00000000, 0x2535fdbc00000000, 0x80e6a17700000000, + 0x38d6a46a00000000, 0x9d05f8a100000000, 0x33776c2700000000, + 0x96a430ec00000000, 0xee111c1900000000, 0x4bc240d200000000, + 0xe5b0d45400000000, 0x4063889f00000000, 0xf8538d8200000000, + 0x5d80d14900000000, 0xf3f245cf00000000, 0x5621190400000000, + 0x83934ff500000000, 0x2640133e00000000, 0x883287b800000000, + 0x2de1db7300000000, 0x95d1de6e00000000, 0x300282a500000000, + 0x9e70162300000000, 0x3ba34ae800000000, 0x7513ca1a00000000, + 0xd0c096d100000000, 0x7eb2025700000000, 0xdb615e9c00000000, + 0x63515b8100000000, 0xc682074a00000000, 0x68f093cc00000000, + 0xcd23cf0700000000, 0x189199f600000000, 0xbd42c53d00000000, + 0x133051bb00000000, 0xb6e30d7000000000, 0x0ed3086d00000000, + 0xab0054a600000000, 0x0572c02000000000, 0xa0a19ceb00000000, + 0xb41ee81100000000, 0x11cdb4da00000000, 0xbfbf205c00000000, + 0x1a6c7c9700000000, 0xa25c798a00000000, 0x078f254100000000, + 0xa9fdb1c700000000, 0x0c2eed0c00000000, 0xd99cbbfd00000000, + 0x7c4fe73600000000, 0xd23d73b000000000, 0x77ee2f7b00000000, + 0xcfde2a6600000000, 0x6a0d76ad00000000, 0xc47fe22b00000000, + 0x61acbee000000000, 0x2f1c3e1200000000, 0x8acf62d900000000, + 0x24bdf65f00000000, 0x816eaa9400000000, 0x395eaf8900000000, + 0x9c8df34200000000, 0x32ff67c400000000, 0x972c3b0f00000000, + 0x429e6dfe00000000, 0xe74d313500000000, 0x493fa5b300000000, + 0xececf97800000000, 0x54dcfc6500000000, 0xf10fa0ae00000000, + 0x5f7d342800000000, 0xfaae68e300000000, 0x821b441600000000, + 0x27c818dd00000000, 0x89ba8c5b00000000, 0x2c69d09000000000, + 0x9459d58d00000000, 0x318a894600000000, 0x9ff81dc000000000, + 0x3a2b410b00000000, 0xef9917fa00000000, 0x4a4a4b3100000000, + 0xe438dfb700000000, 0x41eb837c00000000, 0xf9db866100000000, + 0x5c08daaa00000000, 0xf27a4e2c00000000, 0x57a912e700000000, + 0x1919921500000000, 0xbccacede00000000, 0x12b85a5800000000, + 0xb76b069300000000, 0x0f5b038e00000000, 0xaa885f4500000000, + 0x04facbc300000000, 0xa129970800000000, 0x749bc1f900000000, + 0xd1489d3200000000, 0x7f3a09b400000000, 0xdae9557f00000000, + 0x62d9506200000000, 0xc70a0ca900000000, 0x6978982f00000000, + 0xccabc4e400000000}, + {0x0000000000000000, 0xb40b77a600000000, 0x29119f9700000000, + 0x9d1ae83100000000, 0x13244ff400000000, 0xa72f385200000000, + 0x3a35d06300000000, 0x8e3ea7c500000000, 0x674eef3300000000, + 0xd345989500000000, 0x4e5f70a400000000, 0xfa54070200000000, + 0x746aa0c700000000, 0xc061d76100000000, 0x5d7b3f5000000000, + 0xe97048f600000000, 0xce9cde6700000000, 0x7a97a9c100000000, + 0xe78d41f000000000, 0x5386365600000000, 0xddb8919300000000, + 0x69b3e63500000000, 0xf4a90e0400000000, 0x40a279a200000000, + 0xa9d2315400000000, 0x1dd946f200000000, 0x80c3aec300000000, + 0x34c8d96500000000, 0xbaf67ea000000000, 0x0efd090600000000, + 0x93e7e13700000000, 0x27ec969100000000, 0x9c39bdcf00000000, + 0x2832ca6900000000, 0xb528225800000000, 0x012355fe00000000, + 0x8f1df23b00000000, 0x3b16859d00000000, 0xa60c6dac00000000, + 0x12071a0a00000000, 0xfb7752fc00000000, 0x4f7c255a00000000, + 0xd266cd6b00000000, 0x666dbacd00000000, 0xe8531d0800000000, + 0x5c586aae00000000, 0xc142829f00000000, 0x7549f53900000000, + 0x52a563a800000000, 0xe6ae140e00000000, 0x7bb4fc3f00000000, + 0xcfbf8b9900000000, 0x41812c5c00000000, 0xf58a5bfa00000000, + 0x6890b3cb00000000, 0xdc9bc46d00000000, 0x35eb8c9b00000000, + 0x81e0fb3d00000000, 0x1cfa130c00000000, 0xa8f164aa00000000, + 0x26cfc36f00000000, 0x92c4b4c900000000, 0x0fde5cf800000000, + 0xbbd52b5e00000000, 0x79750b4400000000, 0xcd7e7ce200000000, + 0x506494d300000000, 0xe46fe37500000000, 0x6a5144b000000000, + 0xde5a331600000000, 0x4340db2700000000, 0xf74bac8100000000, + 0x1e3be47700000000, 0xaa3093d100000000, 0x372a7be000000000, + 0x83210c4600000000, 0x0d1fab8300000000, 0xb914dc2500000000, + 0x240e341400000000, 0x900543b200000000, 0xb7e9d52300000000, + 0x03e2a28500000000, 0x9ef84ab400000000, 0x2af33d1200000000, + 0xa4cd9ad700000000, 0x10c6ed7100000000, 0x8ddc054000000000, + 0x39d772e600000000, 0xd0a73a1000000000, 0x64ac4db600000000, + 0xf9b6a58700000000, 0x4dbdd22100000000, 0xc38375e400000000, + 0x7788024200000000, 0xea92ea7300000000, 0x5e999dd500000000, + 0xe54cb68b00000000, 0x5147c12d00000000, 0xcc5d291c00000000, + 0x78565eba00000000, 0xf668f97f00000000, 0x42638ed900000000, + 0xdf7966e800000000, 0x6b72114e00000000, 0x820259b800000000, + 0x36092e1e00000000, 0xab13c62f00000000, 0x1f18b18900000000, + 0x9126164c00000000, 0x252d61ea00000000, 0xb83789db00000000, + 0x0c3cfe7d00000000, 0x2bd068ec00000000, 0x9fdb1f4a00000000, + 0x02c1f77b00000000, 0xb6ca80dd00000000, 0x38f4271800000000, + 0x8cff50be00000000, 0x11e5b88f00000000, 0xa5eecf2900000000, + 0x4c9e87df00000000, 0xf895f07900000000, 0x658f184800000000, + 0xd1846fee00000000, 0x5fbac82b00000000, 0xebb1bf8d00000000, + 0x76ab57bc00000000, 0xc2a0201a00000000, 0xf2ea168800000000, + 0x46e1612e00000000, 0xdbfb891f00000000, 0x6ff0feb900000000, + 0xe1ce597c00000000, 0x55c52eda00000000, 0xc8dfc6eb00000000, + 0x7cd4b14d00000000, 0x95a4f9bb00000000, 0x21af8e1d00000000, + 0xbcb5662c00000000, 0x08be118a00000000, 0x8680b64f00000000, + 0x328bc1e900000000, 0xaf9129d800000000, 0x1b9a5e7e00000000, + 0x3c76c8ef00000000, 0x887dbf4900000000, 0x1567577800000000, + 0xa16c20de00000000, 0x2f52871b00000000, 0x9b59f0bd00000000, + 0x0643188c00000000, 0xb2486f2a00000000, 0x5b3827dc00000000, + 0xef33507a00000000, 0x7229b84b00000000, 0xc622cfed00000000, + 0x481c682800000000, 0xfc171f8e00000000, 0x610df7bf00000000, + 0xd506801900000000, 0x6ed3ab4700000000, 0xdad8dce100000000, + 0x47c234d000000000, 0xf3c9437600000000, 0x7df7e4b300000000, + 0xc9fc931500000000, 0x54e67b2400000000, 0xe0ed0c8200000000, + 0x099d447400000000, 0xbd9633d200000000, 0x208cdbe300000000, + 0x9487ac4500000000, 0x1ab90b8000000000, 0xaeb27c2600000000, + 0x33a8941700000000, 0x87a3e3b100000000, 0xa04f752000000000, + 0x1444028600000000, 0x895eeab700000000, 0x3d559d1100000000, + 0xb36b3ad400000000, 0x07604d7200000000, 0x9a7aa54300000000, + 0x2e71d2e500000000, 0xc7019a1300000000, 0x730aedb500000000, + 0xee10058400000000, 0x5a1b722200000000, 0xd425d5e700000000, + 0x602ea24100000000, 0xfd344a7000000000, 0x493f3dd600000000, + 0x8b9f1dcc00000000, 0x3f946a6a00000000, 0xa28e825b00000000, + 0x1685f5fd00000000, 0x98bb523800000000, 0x2cb0259e00000000, + 0xb1aacdaf00000000, 0x05a1ba0900000000, 0xecd1f2ff00000000, + 0x58da855900000000, 0xc5c06d6800000000, 0x71cb1ace00000000, + 0xfff5bd0b00000000, 0x4bfecaad00000000, 0xd6e4229c00000000, + 0x62ef553a00000000, 0x4503c3ab00000000, 0xf108b40d00000000, + 0x6c125c3c00000000, 0xd8192b9a00000000, 0x56278c5f00000000, + 0xe22cfbf900000000, 0x7f3613c800000000, 0xcb3d646e00000000, + 0x224d2c9800000000, 0x96465b3e00000000, 0x0b5cb30f00000000, + 0xbf57c4a900000000, 0x3169636c00000000, 0x856214ca00000000, + 0x1878fcfb00000000, 0xac738b5d00000000, 0x17a6a00300000000, + 0xa3add7a500000000, 0x3eb73f9400000000, 0x8abc483200000000, + 0x0482eff700000000, 0xb089985100000000, 0x2d93706000000000, + 0x999807c600000000, 0x70e84f3000000000, 0xc4e3389600000000, + 0x59f9d0a700000000, 0xedf2a70100000000, 0x63cc00c400000000, + 0xd7c7776200000000, 0x4add9f5300000000, 0xfed6e8f500000000, + 0xd93a7e6400000000, 0x6d3109c200000000, 0xf02be1f300000000, + 0x4420965500000000, 0xca1e319000000000, 0x7e15463600000000, + 0xe30fae0700000000, 0x5704d9a100000000, 0xbe74915700000000, + 0x0a7fe6f100000000, 0x97650ec000000000, 0x236e796600000000, + 0xad50dea300000000, 0x195ba90500000000, 0x8441413400000000, + 0x304a369200000000}, + {0x0000000000000000, 0x9e00aacc00000000, 0x7d07254200000000, + 0xe3078f8e00000000, 0xfa0e4a8400000000, 0x640ee04800000000, + 0x87096fc600000000, 0x1909c50a00000000, 0xb51be5d300000000, + 0x2b1b4f1f00000000, 0xc81cc09100000000, 0x561c6a5d00000000, + 0x4f15af5700000000, 0xd115059b00000000, 0x32128a1500000000, + 0xac1220d900000000, 0x2b31bb7c00000000, 0xb53111b000000000, + 0x56369e3e00000000, 0xc83634f200000000, 0xd13ff1f800000000, + 0x4f3f5b3400000000, 0xac38d4ba00000000, 0x32387e7600000000, + 0x9e2a5eaf00000000, 0x002af46300000000, 0xe32d7bed00000000, + 0x7d2dd12100000000, 0x6424142b00000000, 0xfa24bee700000000, + 0x1923316900000000, 0x87239ba500000000, 0x566276f900000000, + 0xc862dc3500000000, 0x2b6553bb00000000, 0xb565f97700000000, + 0xac6c3c7d00000000, 0x326c96b100000000, 0xd16b193f00000000, + 0x4f6bb3f300000000, 0xe379932a00000000, 0x7d7939e600000000, + 0x9e7eb66800000000, 0x007e1ca400000000, 0x1977d9ae00000000, + 0x8777736200000000, 0x6470fcec00000000, 0xfa70562000000000, + 0x7d53cd8500000000, 0xe353674900000000, 0x0054e8c700000000, + 0x9e54420b00000000, 0x875d870100000000, 0x195d2dcd00000000, + 0xfa5aa24300000000, 0x645a088f00000000, 0xc848285600000000, + 0x5648829a00000000, 0xb54f0d1400000000, 0x2b4fa7d800000000, + 0x324662d200000000, 0xac46c81e00000000, 0x4f41479000000000, + 0xd141ed5c00000000, 0xedc29d2900000000, 0x73c237e500000000, + 0x90c5b86b00000000, 0x0ec512a700000000, 0x17ccd7ad00000000, + 0x89cc7d6100000000, 0x6acbf2ef00000000, 0xf4cb582300000000, + 0x58d978fa00000000, 0xc6d9d23600000000, 0x25de5db800000000, + 0xbbdef77400000000, 0xa2d7327e00000000, 0x3cd798b200000000, + 0xdfd0173c00000000, 0x41d0bdf000000000, 0xc6f3265500000000, + 0x58f38c9900000000, 0xbbf4031700000000, 0x25f4a9db00000000, + 0x3cfd6cd100000000, 0xa2fdc61d00000000, 0x41fa499300000000, + 0xdffae35f00000000, 0x73e8c38600000000, 0xede8694a00000000, + 0x0eefe6c400000000, 0x90ef4c0800000000, 0x89e6890200000000, + 0x17e623ce00000000, 0xf4e1ac4000000000, 0x6ae1068c00000000, + 0xbba0ebd000000000, 0x25a0411c00000000, 0xc6a7ce9200000000, + 0x58a7645e00000000, 0x41aea15400000000, 0xdfae0b9800000000, + 0x3ca9841600000000, 0xa2a92eda00000000, 0x0ebb0e0300000000, + 0x90bba4cf00000000, 0x73bc2b4100000000, 0xedbc818d00000000, + 0xf4b5448700000000, 0x6ab5ee4b00000000, 0x89b261c500000000, + 0x17b2cb0900000000, 0x909150ac00000000, 0x0e91fa6000000000, + 0xed9675ee00000000, 0x7396df2200000000, 0x6a9f1a2800000000, + 0xf49fb0e400000000, 0x17983f6a00000000, 0x899895a600000000, + 0x258ab57f00000000, 0xbb8a1fb300000000, 0x588d903d00000000, + 0xc68d3af100000000, 0xdf84fffb00000000, 0x4184553700000000, + 0xa283dab900000000, 0x3c83707500000000, 0xda853b5300000000, + 0x4485919f00000000, 0xa7821e1100000000, 0x3982b4dd00000000, + 0x208b71d700000000, 0xbe8bdb1b00000000, 0x5d8c549500000000, + 0xc38cfe5900000000, 0x6f9ede8000000000, 0xf19e744c00000000, + 0x1299fbc200000000, 0x8c99510e00000000, 0x9590940400000000, + 0x0b903ec800000000, 0xe897b14600000000, 0x76971b8a00000000, + 0xf1b4802f00000000, 0x6fb42ae300000000, 0x8cb3a56d00000000, + 0x12b30fa100000000, 0x0bbacaab00000000, 0x95ba606700000000, + 0x76bdefe900000000, 0xe8bd452500000000, 0x44af65fc00000000, + 0xdaafcf3000000000, 0x39a840be00000000, 0xa7a8ea7200000000, + 0xbea12f7800000000, 0x20a185b400000000, 0xc3a60a3a00000000, + 0x5da6a0f600000000, 0x8ce74daa00000000, 0x12e7e76600000000, + 0xf1e068e800000000, 0x6fe0c22400000000, 0x76e9072e00000000, + 0xe8e9ade200000000, 0x0bee226c00000000, 0x95ee88a000000000, + 0x39fca87900000000, 0xa7fc02b500000000, 0x44fb8d3b00000000, + 0xdafb27f700000000, 0xc3f2e2fd00000000, 0x5df2483100000000, + 0xbef5c7bf00000000, 0x20f56d7300000000, 0xa7d6f6d600000000, + 0x39d65c1a00000000, 0xdad1d39400000000, 0x44d1795800000000, + 0x5dd8bc5200000000, 0xc3d8169e00000000, 0x20df991000000000, + 0xbedf33dc00000000, 0x12cd130500000000, 0x8ccdb9c900000000, + 0x6fca364700000000, 0xf1ca9c8b00000000, 0xe8c3598100000000, + 0x76c3f34d00000000, 0x95c47cc300000000, 0x0bc4d60f00000000, + 0x3747a67a00000000, 0xa9470cb600000000, 0x4a40833800000000, + 0xd44029f400000000, 0xcd49ecfe00000000, 0x5349463200000000, + 0xb04ec9bc00000000, 0x2e4e637000000000, 0x825c43a900000000, + 0x1c5ce96500000000, 0xff5b66eb00000000, 0x615bcc2700000000, + 0x7852092d00000000, 0xe652a3e100000000, 0x05552c6f00000000, + 0x9b5586a300000000, 0x1c761d0600000000, 0x8276b7ca00000000, + 0x6171384400000000, 0xff71928800000000, 0xe678578200000000, + 0x7878fd4e00000000, 0x9b7f72c000000000, 0x057fd80c00000000, + 0xa96df8d500000000, 0x376d521900000000, 0xd46add9700000000, + 0x4a6a775b00000000, 0x5363b25100000000, 0xcd63189d00000000, + 0x2e64971300000000, 0xb0643ddf00000000, 0x6125d08300000000, + 0xff257a4f00000000, 0x1c22f5c100000000, 0x82225f0d00000000, + 0x9b2b9a0700000000, 0x052b30cb00000000, 0xe62cbf4500000000, + 0x782c158900000000, 0xd43e355000000000, 0x4a3e9f9c00000000, + 0xa939101200000000, 0x3739bade00000000, 0x2e307fd400000000, + 0xb030d51800000000, 0x53375a9600000000, 0xcd37f05a00000000, + 0x4a146bff00000000, 0xd414c13300000000, 0x37134ebd00000000, + 0xa913e47100000000, 0xb01a217b00000000, 0x2e1a8bb700000000, + 0xcd1d043900000000, 0x531daef500000000, 0xff0f8e2c00000000, + 0x610f24e000000000, 0x8208ab6e00000000, 0x1c0801a200000000, + 0x0501c4a800000000, 0x9b016e6400000000, 0x7806e1ea00000000, + 0xe6064b2600000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee, 0x8f629757, + 0x37def032, 0x256b5fdc, 0x9dd738b9, 0xc5b428ef, 0x7d084f8a, + 0x6fbde064, 0xd7018701, 0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, + 0x58631056, 0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871, + 0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26, 0x95ad7f70, + 0x2d111815, 0x3fa4b7fb, 0x8718d09e, 0x1acfe827, 0xa2738f42, + 0xb0c620ac, 0x087a47c9, 0xa032af3e, 0x188ec85b, 0x0a3b67b5, + 0xb28700d0, 0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787, + 0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f, 0xeae41086, + 0x525877e3, 0x40edd80d, 0xf851bf68, 0xf02bf8a1, 0x48979fc4, + 0x5a22302a, 0xe29e574f, 0x7f496ff6, 0xc7f50893, 0xd540a77d, + 0x6dfcc018, 0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0, + 0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7, 0x9b14583d, + 0x23a83f58, 0x311d90b6, 0x89a1f7d3, 0x1476cf6a, 0xaccaa80f, + 0xbe7f07e1, 0x06c36084, 0x5ea070d2, 0xe61c17b7, 0xf4a9b859, + 0x4c15df3c, 0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b, + 0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c, 0x446f98f5, + 0xfcd3ff90, 0xee66507e, 0x56da371b, 0x0eb9274d, 0xb6054028, + 0xa4b0efc6, 0x1c0c88a3, 0x81dbb01a, 0x3967d77f, 0x2bd27891, + 0x936e1ff4, 0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed, + 0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba, 0xfe92dfec, + 0x462eb889, 0x549b1767, 0xec277002, 0x71f048bb, 0xc94c2fde, + 0xdbf98030, 0x6345e755, 0x6b3fa09c, 0xd383c7f9, 0xc1366817, + 0x798a0f72, 0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825, + 0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d, 0x21e91f24, + 0x99557841, 0x8be0d7af, 0x335cb0ca, 0xed59b63b, 0x55e5d15e, + 0x47507eb0, 0xffec19d5, 0x623b216c, 0xda874609, 0xc832e9e7, + 0x708e8e82, 0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a, + 0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d, 0xbd40e1a4, + 0x05fc86c1, 0x1749292f, 0xaff54e4a, 0x322276f3, 0x8a9e1196, + 0x982bbe78, 0x2097d91d, 0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, + 0x6a4166a5, 0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2, + 0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb, 0xc2098e52, + 0x7ab5e937, 0x680046d9, 0xd0bc21bc, 0x88df31ea, 0x3063568f, + 0x22d6f961, 0x9a6a9e04, 0x07bda6bd, 0xbf01c1d8, 0xadb46e36, + 0x15080953, 0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174, + 0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623, 0xd8c66675, + 0x607a0110, 0x72cfaefe, 0xca73c99b, 0x57a4f122, 0xef189647, + 0xfdad39a9, 0x45115ecc, 0x764dee06, 0xcef18963, 0xdc44268d, + 0x64f841e8, 0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf, + 0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907, 0x3c9b51be, + 0x842736db, 0x96929935, 0x2e2efe50, 0x2654b999, 0x9ee8defc, + 0x8c5d7112, 0x34e11677, 0xa9362ece, 0x118a49ab, 0x033fe645, + 0xbb838120, 0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98, + 0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf, 0xd67f4138, + 0x6ec3265d, 0x7c7689b3, 0xc4caeed6, 0x591dd66f, 0xe1a1b10a, + 0xf3141ee4, 0x4ba87981, 0x13cb69d7, 0xab770eb2, 0xb9c2a15c, + 0x017ec639, 0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e, + 0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949, 0x090481f0, + 0xb1b8e695, 0xa30d497b, 0x1bb12e1e, 0x43d23e48, 0xfb6e592d, + 0xe9dbf6c3, 0x516791a6, 0xccb0a91f, 0x740cce7a, 0x66b96194, + 0xde0506f1}, + {0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59, 0x0709a8dc, + 0x06cbc2eb, 0x048d7cb2, 0x054f1685, 0x0e1351b8, 0x0fd13b8f, + 0x0d9785d6, 0x0c55efe1, 0x091af964, 0x08d89353, 0x0a9e2d0a, + 0x0b5c473d, 0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29, + 0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5, 0x1235f2c8, + 0x13f798ff, 0x11b126a6, 0x10734c91, 0x153c5a14, 0x14fe3023, + 0x16b88e7a, 0x177ae44d, 0x384d46e0, 0x398f2cd7, 0x3bc9928e, + 0x3a0bf8b9, 0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065, + 0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901, 0x3157bf84, + 0x3095d5b3, 0x32d36bea, 0x331101dd, 0x246be590, 0x25a98fa7, + 0x27ef31fe, 0x262d5bc9, 0x23624d4c, 0x22a0277b, 0x20e69922, + 0x2124f315, 0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71, + 0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad, 0x709a8dc0, + 0x7158e7f7, 0x731e59ae, 0x72dc3399, 0x7793251c, 0x76514f2b, + 0x7417f172, 0x75d59b45, 0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, + 0x7ccf6221, 0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd, + 0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9, 0x6bb5866c, + 0x6a77ec5b, 0x68315202, 0x69f33835, 0x62af7f08, 0x636d153f, + 0x612bab66, 0x60e9c151, 0x65a6d7d4, 0x6464bde3, 0x662203ba, + 0x67e0698d, 0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579, + 0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5, 0x46c49a98, + 0x4706f0af, 0x45404ef6, 0x448224c1, 0x41cd3244, 0x400f5873, + 0x4249e62a, 0x438b8c1d, 0x54f16850, 0x55330267, 0x5775bc3e, + 0x56b7d609, 0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5, + 0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1, 0x5deb9134, + 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d, 0xe1351b80, 0xe0f771b7, + 0xe2b1cfee, 0xe373a5d9, 0xe63cb35c, 0xe7fed96b, 0xe5b86732, + 0xe47a0d05, 0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461, + 0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd, 0xfd13b8f0, + 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9, 0xfa1a102c, 0xfbd87a1b, + 0xf99ec442, 0xf85cae75, 0xf300e948, 0xf2c2837f, 0xf0843d26, + 0xf1465711, 0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd, + 0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339, 0xde71f5bc, + 0xdfb39f8b, 0xddf521d2, 0xdc374be5, 0xd76b0cd8, 0xd6a966ef, + 0xd4efd8b6, 0xd52db281, 0xd062a404, 0xd1a0ce33, 0xd3e6706a, + 0xd2241a5d, 0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049, + 0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895, 0xcb4dafa8, + 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1, 0xcc440774, 0xcd866d43, + 0xcfc0d31a, 0xce02b92d, 0x91af9640, 0x906dfc77, 0x922b422e, + 0x93e92819, 0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5, + 0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1, 0x98b56f24, + 0x99770513, 0x9b31bb4a, 0x9af3d17d, 0x8d893530, 0x8c4b5f07, + 0x8e0de15e, 0x8fcf8b69, 0x8a809dec, 0x8b42f7db, 0x89044982, + 0x88c623b5, 0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1, + 0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d, 0xa9e2d0a0, + 0xa820ba97, 0xaa6604ce, 0xaba46ef9, 0xaeeb787c, 0xaf29124b, + 0xad6fac12, 0xacadc625, 0xa7f18118, 0xa633eb2f, 0xa4755576, + 0xa5b73f41, 0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d, + 0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89, 0xb2cddb0c, + 0xb30fb13b, 0xb1490f62, 0xb08b6555, 0xbbd72268, 0xba15485f, + 0xb853f606, 0xb9919c31, 0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, + 0xbe9834ed}, + {0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3, 0x646cc504, + 0x7d77f445, 0x565aa786, 0x4f4196c7, 0xc8d98a08, 0xd1c2bb49, + 0xfaefe88a, 0xe3f4d9cb, 0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, + 0x87981ccf, 0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192, + 0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496, 0x821b9859, + 0x9b00a918, 0xb02dfadb, 0xa936cb9a, 0xe6775d5d, 0xff6c6c1c, + 0xd4413fdf, 0xcd5a0e9e, 0x958424a2, 0x8c9f15e3, 0xa7b24620, + 0xbea97761, 0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265, + 0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69, 0x39316bae, + 0x202a5aef, 0x0b07092c, 0x121c386d, 0xdf4636f3, 0xc65d07b2, + 0xed705471, 0xf46b6530, 0xbb2af3f7, 0xa231c2b6, 0x891c9175, + 0x9007a034, 0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38, + 0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c, 0xf0794f05, + 0xe9627e44, 0xc24f2d87, 0xdb541cc6, 0x94158a01, 0x8d0ebb40, + 0xa623e883, 0xbf38d9c2, 0x38a0c50d, 0x21bbf44c, 0x0a96a78f, + 0x138d96ce, 0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca, + 0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97, 0xded79850, + 0xc7cca911, 0xece1fad2, 0xf5facb93, 0x7262d75c, 0x6b79e61d, + 0x4054b5de, 0x594f849f, 0x160e1258, 0x0f152319, 0x243870da, + 0x3d23419b, 0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864, + 0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60, 0xad24e1af, + 0xb43fd0ee, 0x9f12832d, 0x8609b26c, 0xc94824ab, 0xd05315ea, + 0xfb7e4629, 0xe2657768, 0x2f3f79f6, 0x362448b7, 0x1d091b74, + 0x04122a35, 0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31, + 0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d, 0x838a36fa, + 0x9a9107bb, 0xb1bc5478, 0xa8a76539, 0x3b83984b, 0x2298a90a, + 0x09b5fac9, 0x10aecb88, 0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, + 0x74c20e8c, 0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180, + 0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484, 0x71418a1a, + 0x685abb5b, 0x4377e898, 0x5a6cd9d9, 0x152d4f1e, 0x0c367e5f, + 0x271b2d9c, 0x3e001cdd, 0xb9980012, 0xa0833153, 0x8bae6290, + 0x92b553d1, 0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5, + 0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a, 0xca6b79ed, + 0xd37048ac, 0xf85d1b6f, 0xe1462a2e, 0x66de36e1, 0x7fc507a0, + 0x54e85463, 0x4df36522, 0x02b2f3e5, 0x1ba9c2a4, 0x30849167, + 0x299fa026, 0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b, + 0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f, 0x2c1c24b0, + 0x350715f1, 0x1e2a4632, 0x07317773, 0x4870e1b4, 0x516bd0f5, + 0x7a468336, 0x635db277, 0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, + 0xe0d7848d, 0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189, + 0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85, 0x674f9842, + 0x7e54a903, 0x5579fac0, 0x4c62cb81, 0x8138c51f, 0x9823f45e, + 0xb30ea79d, 0xaa1596dc, 0xe554001b, 0xfc4f315a, 0xd7626299, + 0xce7953d8, 0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4, + 0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0, 0x5e7ef3ec, + 0x4765c2ad, 0x6c48916e, 0x7553a02f, 0x3a1236e8, 0x230907a9, + 0x0824546a, 0x113f652b, 0x96a779e4, 0x8fbc48a5, 0xa4911b66, + 0xbd8a2a27, 0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23, + 0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e, 0x70d024b9, + 0x69cb15f8, 0x42e6463b, 0x5bfd777a, 0xdc656bb5, 0xc57e5af4, + 0xee530937, 0xf7483876, 0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, + 0x9324fd72}, + {0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x96300777, 0x2c610eee, 0xba510999, 0x19c46d07, + 0x8ff46a70, 0x35a563e9, 0xa395649e, 0x3288db0e, 0xa4b8dc79, + 0x1ee9d5e0, 0x88d9d297, 0x2b4cb609, 0xbd7cb17e, 0x072db8e7, + 0x911dbf90, 0x6410b71d, 0xf220b06a, 0x4871b9f3, 0xde41be84, + 0x7dd4da1a, 0xebe4dd6d, 0x51b5d4f4, 0xc785d383, 0x56986c13, + 0xc0a86b64, 0x7af962fd, 0xecc9658a, 0x4f5c0114, 0xd96c0663, + 0x633d0ffa, 0xf50d088d, 0xc8206e3b, 0x5e10694c, 0xe44160d5, + 0x727167a2, 0xd1e4033c, 0x47d4044b, 0xfd850dd2, 0x6bb50aa5, + 0xfaa8b535, 0x6c98b242, 0xd6c9bbdb, 0x40f9bcac, 0xe36cd832, + 0x755cdf45, 0xcf0dd6dc, 0x593dd1ab, 0xac30d926, 0x3a00de51, + 0x8051d7c8, 0x1661d0bf, 0xb5f4b421, 0x23c4b356, 0x9995bacf, + 0x0fa5bdb8, 0x9eb80228, 0x0888055f, 0xb2d90cc6, 0x24e90bb1, + 0x877c6f2f, 0x114c6858, 0xab1d61c1, 0x3d2d66b6, 0x9041dc76, + 0x0671db01, 0xbc20d298, 0x2a10d5ef, 0x8985b171, 0x1fb5b606, + 0xa5e4bf9f, 0x33d4b8e8, 0xa2c90778, 0x34f9000f, 0x8ea80996, + 0x18980ee1, 0xbb0d6a7f, 0x2d3d6d08, 0x976c6491, 0x015c63e6, + 0xf4516b6b, 0x62616c1c, 0xd8306585, 0x4e0062f2, 0xed95066c, + 0x7ba5011b, 0xc1f40882, 0x57c40ff5, 0xc6d9b065, 0x50e9b712, + 0xeab8be8b, 0x7c88b9fc, 0xdf1ddd62, 0x492dda15, 0xf37cd38c, + 0x654cd4fb, 0x5861b24d, 0xce51b53a, 0x7400bca3, 0xe230bbd4, + 0x41a5df4a, 0xd795d83d, 0x6dc4d1a4, 0xfbf4d6d3, 0x6ae96943, + 0xfcd96e34, 0x468867ad, 0xd0b860da, 0x732d0444, 0xe51d0333, + 0x5f4c0aaa, 0xc97c0ddd, 0x3c710550, 0xaa410227, 0x10100bbe, + 0x86200cc9, 0x25b56857, 0xb3856f20, 0x09d466b9, 0x9fe461ce, + 0x0ef9de5e, 0x98c9d929, 0x2298d0b0, 0xb4a8d7c7, 0x173db359, + 0x810db42e, 0x3b5cbdb7, 0xad6cbac0, 0x2083b8ed, 0xb6b3bf9a, + 0x0ce2b603, 0x9ad2b174, 0x3947d5ea, 0xaf77d29d, 0x1526db04, + 0x8316dc73, 0x120b63e3, 0x843b6494, 0x3e6a6d0d, 0xa85a6a7a, + 0x0bcf0ee4, 0x9dff0993, 0x27ae000a, 0xb19e077d, 0x44930ff0, + 0xd2a30887, 0x68f2011e, 0xfec20669, 0x5d5762f7, 0xcb676580, + 0x71366c19, 0xe7066b6e, 0x761bd4fe, 0xe02bd389, 0x5a7ada10, + 0xcc4add67, 0x6fdfb9f9, 0xf9efbe8e, 0x43beb717, 0xd58eb060, + 0xe8a3d6d6, 0x7e93d1a1, 0xc4c2d838, 0x52f2df4f, 0xf167bbd1, + 0x6757bca6, 0xdd06b53f, 0x4b36b248, 0xda2b0dd8, 0x4c1b0aaf, + 0xf64a0336, 0x607a0441, 0xc3ef60df, 0x55df67a8, 0xef8e6e31, + 0x79be6946, 0x8cb361cb, 0x1a8366bc, 0xa0d26f25, 0x36e26852, + 0x95770ccc, 0x03470bbb, 0xb9160222, 0x2f260555, 0xbe3bbac5, + 0x280bbdb2, 0x925ab42b, 0x046ab35c, 0xa7ffd7c2, 0x31cfd0b5, + 0x8b9ed92c, 0x1daede5b, 0xb0c2649b, 0x26f263ec, 0x9ca36a75, + 0x0a936d02, 0xa906099c, 0x3f360eeb, 0x85670772, 0x13570005, + 0x824abf95, 0x147ab8e2, 0xae2bb17b, 0x381bb60c, 0x9b8ed292, + 0x0dbed5e5, 0xb7efdc7c, 0x21dfdb0b, 0xd4d2d386, 0x42e2d4f1, + 0xf8b3dd68, 0x6e83da1f, 0xcd16be81, 0x5b26b9f6, 0xe177b06f, + 0x7747b718, 0xe65a0888, 0x706a0fff, 0xca3b0666, 0x5c0b0111, + 0xff9e658f, 0x69ae62f8, 0xd3ff6b61, 0x45cf6c16, 0x78e20aa0, + 0xeed20dd7, 0x5483044e, 0xc2b30339, 0x612667a7, 0xf71660d0, + 0x4d476949, 0xdb776e3e, 0x4a6ad1ae, 0xdc5ad6d9, 0x660bdf40, + 0xf03bd837, 0x53aebca9, 0xc59ebbde, 0x7fcfb247, 0xe9ffb530, + 0x1cf2bdbd, 0x8ac2baca, 0x3093b353, 0xa6a3b424, 0x0536d0ba, + 0x9306d7cd, 0x2957de54, 0xbf67d923, 0x2e7a66b3, 0xb84a61c4, + 0x021b685d, 0x942b6f2a, 0x37be0bb4, 0xa18e0cc3, 0x1bdf055a, + 0x8def022d}, + {0x00000000, 0x41311b19, 0x82623632, 0xc3532d2b, 0x04c56c64, + 0x45f4777d, 0x86a75a56, 0xc796414f, 0x088ad9c8, 0x49bbc2d1, + 0x8ae8effa, 0xcbd9f4e3, 0x0c4fb5ac, 0x4d7eaeb5, 0x8e2d839e, + 0xcf1c9887, 0x5112c24a, 0x1023d953, 0xd370f478, 0x9241ef61, + 0x55d7ae2e, 0x14e6b537, 0xd7b5981c, 0x96848305, 0x59981b82, + 0x18a9009b, 0xdbfa2db0, 0x9acb36a9, 0x5d5d77e6, 0x1c6c6cff, + 0xdf3f41d4, 0x9e0e5acd, 0xa2248495, 0xe3159f8c, 0x2046b2a7, + 0x6177a9be, 0xa6e1e8f1, 0xe7d0f3e8, 0x2483dec3, 0x65b2c5da, + 0xaaae5d5d, 0xeb9f4644, 0x28cc6b6f, 0x69fd7076, 0xae6b3139, + 0xef5a2a20, 0x2c09070b, 0x6d381c12, 0xf33646df, 0xb2075dc6, + 0x715470ed, 0x30656bf4, 0xf7f32abb, 0xb6c231a2, 0x75911c89, + 0x34a00790, 0xfbbc9f17, 0xba8d840e, 0x79dea925, 0x38efb23c, + 0xff79f373, 0xbe48e86a, 0x7d1bc541, 0x3c2ade58, 0x054f79f0, + 0x447e62e9, 0x872d4fc2, 0xc61c54db, 0x018a1594, 0x40bb0e8d, + 0x83e823a6, 0xc2d938bf, 0x0dc5a038, 0x4cf4bb21, 0x8fa7960a, + 0xce968d13, 0x0900cc5c, 0x4831d745, 0x8b62fa6e, 0xca53e177, + 0x545dbbba, 0x156ca0a3, 0xd63f8d88, 0x970e9691, 0x5098d7de, + 0x11a9ccc7, 0xd2fae1ec, 0x93cbfaf5, 0x5cd76272, 0x1de6796b, + 0xdeb55440, 0x9f844f59, 0x58120e16, 0x1923150f, 0xda703824, + 0x9b41233d, 0xa76bfd65, 0xe65ae67c, 0x2509cb57, 0x6438d04e, + 0xa3ae9101, 0xe29f8a18, 0x21cca733, 0x60fdbc2a, 0xafe124ad, + 0xeed03fb4, 0x2d83129f, 0x6cb20986, 0xab2448c9, 0xea1553d0, + 0x29467efb, 0x687765e2, 0xf6793f2f, 0xb7482436, 0x741b091d, + 0x352a1204, 0xf2bc534b, 0xb38d4852, 0x70de6579, 0x31ef7e60, + 0xfef3e6e7, 0xbfc2fdfe, 0x7c91d0d5, 0x3da0cbcc, 0xfa368a83, + 0xbb07919a, 0x7854bcb1, 0x3965a7a8, 0x4b98833b, 0x0aa99822, + 0xc9fab509, 0x88cbae10, 0x4f5def5f, 0x0e6cf446, 0xcd3fd96d, + 0x8c0ec274, 0x43125af3, 0x022341ea, 0xc1706cc1, 0x804177d8, + 0x47d73697, 0x06e62d8e, 0xc5b500a5, 0x84841bbc, 0x1a8a4171, + 0x5bbb5a68, 0x98e87743, 0xd9d96c5a, 0x1e4f2d15, 0x5f7e360c, + 0x9c2d1b27, 0xdd1c003e, 0x120098b9, 0x533183a0, 0x9062ae8b, + 0xd153b592, 0x16c5f4dd, 0x57f4efc4, 0x94a7c2ef, 0xd596d9f6, + 0xe9bc07ae, 0xa88d1cb7, 0x6bde319c, 0x2aef2a85, 0xed796bca, + 0xac4870d3, 0x6f1b5df8, 0x2e2a46e1, 0xe136de66, 0xa007c57f, + 0x6354e854, 0x2265f34d, 0xe5f3b202, 0xa4c2a91b, 0x67918430, + 0x26a09f29, 0xb8aec5e4, 0xf99fdefd, 0x3accf3d6, 0x7bfde8cf, + 0xbc6ba980, 0xfd5ab299, 0x3e099fb2, 0x7f3884ab, 0xb0241c2c, + 0xf1150735, 0x32462a1e, 0x73773107, 0xb4e17048, 0xf5d06b51, + 0x3683467a, 0x77b25d63, 0x4ed7facb, 0x0fe6e1d2, 0xccb5ccf9, + 0x8d84d7e0, 0x4a1296af, 0x0b238db6, 0xc870a09d, 0x8941bb84, + 0x465d2303, 0x076c381a, 0xc43f1531, 0x850e0e28, 0x42984f67, + 0x03a9547e, 0xc0fa7955, 0x81cb624c, 0x1fc53881, 0x5ef42398, + 0x9da70eb3, 0xdc9615aa, 0x1b0054e5, 0x5a314ffc, 0x996262d7, + 0xd85379ce, 0x174fe149, 0x567efa50, 0x952dd77b, 0xd41ccc62, + 0x138a8d2d, 0x52bb9634, 0x91e8bb1f, 0xd0d9a006, 0xecf37e5e, + 0xadc26547, 0x6e91486c, 0x2fa05375, 0xe836123a, 0xa9070923, + 0x6a542408, 0x2b653f11, 0xe479a796, 0xa548bc8f, 0x661b91a4, + 0x272a8abd, 0xe0bccbf2, 0xa18dd0eb, 0x62defdc0, 0x23efe6d9, + 0xbde1bc14, 0xfcd0a70d, 0x3f838a26, 0x7eb2913f, 0xb924d070, + 0xf815cb69, 0x3b46e642, 0x7a77fd5b, 0xb56b65dc, 0xf45a7ec5, + 0x370953ee, 0x763848f7, 0xb1ae09b8, 0xf09f12a1, 0x33cc3f8a, + 0x72fd2493}, + {0x00000000, 0x376ac201, 0x6ed48403, 0x59be4602, 0xdca80907, + 0xebc2cb06, 0xb27c8d04, 0x85164f05, 0xb851130e, 0x8f3bd10f, + 0xd685970d, 0xe1ef550c, 0x64f91a09, 0x5393d808, 0x0a2d9e0a, + 0x3d475c0b, 0x70a3261c, 0x47c9e41d, 0x1e77a21f, 0x291d601e, + 0xac0b2f1b, 0x9b61ed1a, 0xc2dfab18, 0xf5b56919, 0xc8f23512, + 0xff98f713, 0xa626b111, 0x914c7310, 0x145a3c15, 0x2330fe14, + 0x7a8eb816, 0x4de47a17, 0xe0464d38, 0xd72c8f39, 0x8e92c93b, + 0xb9f80b3a, 0x3cee443f, 0x0b84863e, 0x523ac03c, 0x6550023d, + 0x58175e36, 0x6f7d9c37, 0x36c3da35, 0x01a91834, 0x84bf5731, + 0xb3d59530, 0xea6bd332, 0xdd011133, 0x90e56b24, 0xa78fa925, + 0xfe31ef27, 0xc95b2d26, 0x4c4d6223, 0x7b27a022, 0x2299e620, + 0x15f32421, 0x28b4782a, 0x1fdeba2b, 0x4660fc29, 0x710a3e28, + 0xf41c712d, 0xc376b32c, 0x9ac8f52e, 0xada2372f, 0xc08d9a70, + 0xf7e75871, 0xae591e73, 0x9933dc72, 0x1c259377, 0x2b4f5176, + 0x72f11774, 0x459bd575, 0x78dc897e, 0x4fb64b7f, 0x16080d7d, + 0x2162cf7c, 0xa4748079, 0x931e4278, 0xcaa0047a, 0xfdcac67b, + 0xb02ebc6c, 0x87447e6d, 0xdefa386f, 0xe990fa6e, 0x6c86b56b, + 0x5bec776a, 0x02523168, 0x3538f369, 0x087faf62, 0x3f156d63, + 0x66ab2b61, 0x51c1e960, 0xd4d7a665, 0xe3bd6464, 0xba032266, + 0x8d69e067, 0x20cbd748, 0x17a11549, 0x4e1f534b, 0x7975914a, + 0xfc63de4f, 0xcb091c4e, 0x92b75a4c, 0xa5dd984d, 0x989ac446, + 0xaff00647, 0xf64e4045, 0xc1248244, 0x4432cd41, 0x73580f40, + 0x2ae64942, 0x1d8c8b43, 0x5068f154, 0x67023355, 0x3ebc7557, + 0x09d6b756, 0x8cc0f853, 0xbbaa3a52, 0xe2147c50, 0xd57ebe51, + 0xe839e25a, 0xdf53205b, 0x86ed6659, 0xb187a458, 0x3491eb5d, + 0x03fb295c, 0x5a456f5e, 0x6d2fad5f, 0x801b35e1, 0xb771f7e0, + 0xeecfb1e2, 0xd9a573e3, 0x5cb33ce6, 0x6bd9fee7, 0x3267b8e5, + 0x050d7ae4, 0x384a26ef, 0x0f20e4ee, 0x569ea2ec, 0x61f460ed, + 0xe4e22fe8, 0xd388ede9, 0x8a36abeb, 0xbd5c69ea, 0xf0b813fd, + 0xc7d2d1fc, 0x9e6c97fe, 0xa90655ff, 0x2c101afa, 0x1b7ad8fb, + 0x42c49ef9, 0x75ae5cf8, 0x48e900f3, 0x7f83c2f2, 0x263d84f0, + 0x115746f1, 0x944109f4, 0xa32bcbf5, 0xfa958df7, 0xcdff4ff6, + 0x605d78d9, 0x5737bad8, 0x0e89fcda, 0x39e33edb, 0xbcf571de, + 0x8b9fb3df, 0xd221f5dd, 0xe54b37dc, 0xd80c6bd7, 0xef66a9d6, + 0xb6d8efd4, 0x81b22dd5, 0x04a462d0, 0x33cea0d1, 0x6a70e6d3, + 0x5d1a24d2, 0x10fe5ec5, 0x27949cc4, 0x7e2adac6, 0x494018c7, + 0xcc5657c2, 0xfb3c95c3, 0xa282d3c1, 0x95e811c0, 0xa8af4dcb, + 0x9fc58fca, 0xc67bc9c8, 0xf1110bc9, 0x740744cc, 0x436d86cd, + 0x1ad3c0cf, 0x2db902ce, 0x4096af91, 0x77fc6d90, 0x2e422b92, + 0x1928e993, 0x9c3ea696, 0xab546497, 0xf2ea2295, 0xc580e094, + 0xf8c7bc9f, 0xcfad7e9e, 0x9613389c, 0xa179fa9d, 0x246fb598, + 0x13057799, 0x4abb319b, 0x7dd1f39a, 0x3035898d, 0x075f4b8c, + 0x5ee10d8e, 0x698bcf8f, 0xec9d808a, 0xdbf7428b, 0x82490489, + 0xb523c688, 0x88649a83, 0xbf0e5882, 0xe6b01e80, 0xd1dadc81, + 0x54cc9384, 0x63a65185, 0x3a181787, 0x0d72d586, 0xa0d0e2a9, + 0x97ba20a8, 0xce0466aa, 0xf96ea4ab, 0x7c78ebae, 0x4b1229af, + 0x12ac6fad, 0x25c6adac, 0x1881f1a7, 0x2feb33a6, 0x765575a4, + 0x413fb7a5, 0xc429f8a0, 0xf3433aa1, 0xaafd7ca3, 0x9d97bea2, + 0xd073c4b5, 0xe71906b4, 0xbea740b6, 0x89cd82b7, 0x0cdbcdb2, + 0x3bb10fb3, 0x620f49b1, 0x55658bb0, 0x6822d7bb, 0x5f4815ba, + 0x06f653b8, 0x319c91b9, 0xb48adebc, 0x83e01cbd, 0xda5e5abf, + 0xed3498be}, + {0x00000000, 0x6567bcb8, 0x8bc809aa, 0xeeafb512, 0x5797628f, + 0x32f0de37, 0xdc5f6b25, 0xb938d79d, 0xef28b4c5, 0x8a4f087d, + 0x64e0bd6f, 0x018701d7, 0xb8bfd64a, 0xddd86af2, 0x3377dfe0, + 0x56106358, 0x9f571950, 0xfa30a5e8, 0x149f10fa, 0x71f8ac42, + 0xc8c07bdf, 0xada7c767, 0x43087275, 0x266fcecd, 0x707fad95, + 0x1518112d, 0xfbb7a43f, 0x9ed01887, 0x27e8cf1a, 0x428f73a2, + 0xac20c6b0, 0xc9477a08, 0x3eaf32a0, 0x5bc88e18, 0xb5673b0a, + 0xd00087b2, 0x6938502f, 0x0c5fec97, 0xe2f05985, 0x8797e53d, + 0xd1878665, 0xb4e03add, 0x5a4f8fcf, 0x3f283377, 0x8610e4ea, + 0xe3775852, 0x0dd8ed40, 0x68bf51f8, 0xa1f82bf0, 0xc49f9748, + 0x2a30225a, 0x4f579ee2, 0xf66f497f, 0x9308f5c7, 0x7da740d5, + 0x18c0fc6d, 0x4ed09f35, 0x2bb7238d, 0xc518969f, 0xa07f2a27, + 0x1947fdba, 0x7c204102, 0x928ff410, 0xf7e848a8, 0x3d58149b, + 0x583fa823, 0xb6901d31, 0xd3f7a189, 0x6acf7614, 0x0fa8caac, + 0xe1077fbe, 0x8460c306, 0xd270a05e, 0xb7171ce6, 0x59b8a9f4, + 0x3cdf154c, 0x85e7c2d1, 0xe0807e69, 0x0e2fcb7b, 0x6b4877c3, + 0xa20f0dcb, 0xc768b173, 0x29c70461, 0x4ca0b8d9, 0xf5986f44, + 0x90ffd3fc, 0x7e5066ee, 0x1b37da56, 0x4d27b90e, 0x284005b6, + 0xc6efb0a4, 0xa3880c1c, 0x1ab0db81, 0x7fd76739, 0x9178d22b, + 0xf41f6e93, 0x03f7263b, 0x66909a83, 0x883f2f91, 0xed589329, + 0x546044b4, 0x3107f80c, 0xdfa84d1e, 0xbacff1a6, 0xecdf92fe, + 0x89b82e46, 0x67179b54, 0x027027ec, 0xbb48f071, 0xde2f4cc9, + 0x3080f9db, 0x55e74563, 0x9ca03f6b, 0xf9c783d3, 0x176836c1, + 0x720f8a79, 0xcb375de4, 0xae50e15c, 0x40ff544e, 0x2598e8f6, + 0x73888bae, 0x16ef3716, 0xf8408204, 0x9d273ebc, 0x241fe921, + 0x41785599, 0xafd7e08b, 0xcab05c33, 0x3bb659ed, 0x5ed1e555, + 0xb07e5047, 0xd519ecff, 0x6c213b62, 0x094687da, 0xe7e932c8, + 0x828e8e70, 0xd49eed28, 0xb1f95190, 0x5f56e482, 0x3a31583a, + 0x83098fa7, 0xe66e331f, 0x08c1860d, 0x6da63ab5, 0xa4e140bd, + 0xc186fc05, 0x2f294917, 0x4a4ef5af, 0xf3762232, 0x96119e8a, + 0x78be2b98, 0x1dd99720, 0x4bc9f478, 0x2eae48c0, 0xc001fdd2, + 0xa566416a, 0x1c5e96f7, 0x79392a4f, 0x97969f5d, 0xf2f123e5, + 0x05196b4d, 0x607ed7f5, 0x8ed162e7, 0xebb6de5f, 0x528e09c2, + 0x37e9b57a, 0xd9460068, 0xbc21bcd0, 0xea31df88, 0x8f566330, + 0x61f9d622, 0x049e6a9a, 0xbda6bd07, 0xd8c101bf, 0x366eb4ad, + 0x53090815, 0x9a4e721d, 0xff29cea5, 0x11867bb7, 0x74e1c70f, + 0xcdd91092, 0xa8beac2a, 0x46111938, 0x2376a580, 0x7566c6d8, + 0x10017a60, 0xfeaecf72, 0x9bc973ca, 0x22f1a457, 0x479618ef, + 0xa939adfd, 0xcc5e1145, 0x06ee4d76, 0x6389f1ce, 0x8d2644dc, + 0xe841f864, 0x51792ff9, 0x341e9341, 0xdab12653, 0xbfd69aeb, + 0xe9c6f9b3, 0x8ca1450b, 0x620ef019, 0x07694ca1, 0xbe519b3c, + 0xdb362784, 0x35999296, 0x50fe2e2e, 0x99b95426, 0xfcdee89e, + 0x12715d8c, 0x7716e134, 0xce2e36a9, 0xab498a11, 0x45e63f03, + 0x208183bb, 0x7691e0e3, 0x13f65c5b, 0xfd59e949, 0x983e55f1, + 0x2106826c, 0x44613ed4, 0xaace8bc6, 0xcfa9377e, 0x38417fd6, + 0x5d26c36e, 0xb389767c, 0xd6eecac4, 0x6fd61d59, 0x0ab1a1e1, + 0xe41e14f3, 0x8179a84b, 0xd769cb13, 0xb20e77ab, 0x5ca1c2b9, + 0x39c67e01, 0x80fea99c, 0xe5991524, 0x0b36a036, 0x6e511c8e, + 0xa7166686, 0xc271da3e, 0x2cde6f2c, 0x49b9d394, 0xf0810409, + 0x95e6b8b1, 0x7b490da3, 0x1e2eb11b, 0x483ed243, 0x2d596efb, + 0xc3f6dbe9, 0xa6916751, 0x1fa9b0cc, 0x7ace0c74, 0x9461b966, + 0xf10605de}}; + +#endif + +#endif + +#if N == 2 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xae689191, 0x87a02563, 0x29c8b4f2, 0xd4314c87, + 0x7a59dd16, 0x539169e4, 0xfdf9f875, 0x73139f4f, 0xdd7b0ede, + 0xf4b3ba2c, 0x5adb2bbd, 0xa722d3c8, 0x094a4259, 0x2082f6ab, + 0x8eea673a, 0xe6273e9e, 0x484faf0f, 0x61871bfd, 0xcfef8a6c, + 0x32167219, 0x9c7ee388, 0xb5b6577a, 0x1bdec6eb, 0x9534a1d1, + 0x3b5c3040, 0x129484b2, 0xbcfc1523, 0x4105ed56, 0xef6d7cc7, + 0xc6a5c835, 0x68cd59a4, 0x173f7b7d, 0xb957eaec, 0x909f5e1e, + 0x3ef7cf8f, 0xc30e37fa, 0x6d66a66b, 0x44ae1299, 0xeac68308, + 0x642ce432, 0xca4475a3, 0xe38cc151, 0x4de450c0, 0xb01da8b5, + 0x1e753924, 0x37bd8dd6, 0x99d51c47, 0xf11845e3, 0x5f70d472, + 0x76b86080, 0xd8d0f111, 0x25290964, 0x8b4198f5, 0xa2892c07, + 0x0ce1bd96, 0x820bdaac, 0x2c634b3d, 0x05abffcf, 0xabc36e5e, + 0x563a962b, 0xf85207ba, 0xd19ab348, 0x7ff222d9, 0x2e7ef6fa, + 0x8016676b, 0xa9ded399, 0x07b64208, 0xfa4fba7d, 0x54272bec, + 0x7def9f1e, 0xd3870e8f, 0x5d6d69b5, 0xf305f824, 0xdacd4cd6, + 0x74a5dd47, 0x895c2532, 0x2734b4a3, 0x0efc0051, 0xa09491c0, + 0xc859c864, 0x663159f5, 0x4ff9ed07, 0xe1917c96, 0x1c6884e3, + 0xb2001572, 0x9bc8a180, 0x35a03011, 0xbb4a572b, 0x1522c6ba, + 0x3cea7248, 0x9282e3d9, 0x6f7b1bac, 0xc1138a3d, 0xe8db3ecf, + 0x46b3af5e, 0x39418d87, 0x97291c16, 0xbee1a8e4, 0x10893975, + 0xed70c100, 0x43185091, 0x6ad0e463, 0xc4b875f2, 0x4a5212c8, + 0xe43a8359, 0xcdf237ab, 0x639aa63a, 0x9e635e4f, 0x300bcfde, + 0x19c37b2c, 0xb7abeabd, 0xdf66b319, 0x710e2288, 0x58c6967a, + 0xf6ae07eb, 0x0b57ff9e, 0xa53f6e0f, 0x8cf7dafd, 0x229f4b6c, + 0xac752c56, 0x021dbdc7, 0x2bd50935, 0x85bd98a4, 0x784460d1, + 0xd62cf140, 0xffe445b2, 0x518cd423, 0x5cfdedf4, 0xf2957c65, + 0xdb5dc897, 0x75355906, 0x88cca173, 0x26a430e2, 0x0f6c8410, + 0xa1041581, 0x2fee72bb, 0x8186e32a, 0xa84e57d8, 0x0626c649, + 0xfbdf3e3c, 0x55b7afad, 0x7c7f1b5f, 0xd2178ace, 0xbadad36a, + 0x14b242fb, 0x3d7af609, 0x93126798, 0x6eeb9fed, 0xc0830e7c, + 0xe94bba8e, 0x47232b1f, 0xc9c94c25, 0x67a1ddb4, 0x4e696946, + 0xe001f8d7, 0x1df800a2, 0xb3909133, 0x9a5825c1, 0x3430b450, + 0x4bc29689, 0xe5aa0718, 0xcc62b3ea, 0x620a227b, 0x9ff3da0e, + 0x319b4b9f, 0x1853ff6d, 0xb63b6efc, 0x38d109c6, 0x96b99857, + 0xbf712ca5, 0x1119bd34, 0xece04541, 0x4288d4d0, 0x6b406022, + 0xc528f1b3, 0xade5a817, 0x038d3986, 0x2a458d74, 0x842d1ce5, + 0x79d4e490, 0xd7bc7501, 0xfe74c1f3, 0x501c5062, 0xdef63758, + 0x709ea6c9, 0x5956123b, 0xf73e83aa, 0x0ac77bdf, 0xa4afea4e, + 0x8d675ebc, 0x230fcf2d, 0x72831b0e, 0xdceb8a9f, 0xf5233e6d, + 0x5b4baffc, 0xa6b25789, 0x08dac618, 0x211272ea, 0x8f7ae37b, + 0x01908441, 0xaff815d0, 0x8630a122, 0x285830b3, 0xd5a1c8c6, + 0x7bc95957, 0x5201eda5, 0xfc697c34, 0x94a42590, 0x3accb401, + 0x130400f3, 0xbd6c9162, 0x40956917, 0xeefdf886, 0xc7354c74, + 0x695ddde5, 0xe7b7badf, 0x49df2b4e, 0x60179fbc, 0xce7f0e2d, + 0x3386f658, 0x9dee67c9, 0xb426d33b, 0x1a4e42aa, 0x65bc6073, + 0xcbd4f1e2, 0xe21c4510, 0x4c74d481, 0xb18d2cf4, 0x1fe5bd65, + 0x362d0997, 0x98459806, 0x16afff3c, 0xb8c76ead, 0x910fda5f, + 0x3f674bce, 0xc29eb3bb, 0x6cf6222a, 0x453e96d8, 0xeb560749, + 0x839b5eed, 0x2df3cf7c, 0x043b7b8e, 0xaa53ea1f, 0x57aa126a, + 0xf9c283fb, 0xd00a3709, 0x7e62a698, 0xf088c1a2, 0x5ee05033, + 0x7728e4c1, 0xd9407550, 0x24b98d25, 0x8ad11cb4, 0xa319a846, + 0x0d7139d7}, + {0x00000000, 0xb9fbdbe8, 0xa886b191, 0x117d6a79, 0x8a7c6563, + 0x3387be8b, 0x22fad4f2, 0x9b010f1a, 0xcf89cc87, 0x7672176f, + 0x670f7d16, 0xdef4a6fe, 0x45f5a9e4, 0xfc0e720c, 0xed731875, + 0x5488c39d, 0x44629f4f, 0xfd9944a7, 0xece42ede, 0x551ff536, + 0xce1efa2c, 0x77e521c4, 0x66984bbd, 0xdf639055, 0x8beb53c8, + 0x32108820, 0x236de259, 0x9a9639b1, 0x019736ab, 0xb86ced43, + 0xa911873a, 0x10ea5cd2, 0x88c53e9e, 0x313ee576, 0x20438f0f, + 0x99b854e7, 0x02b95bfd, 0xbb428015, 0xaa3fea6c, 0x13c43184, + 0x474cf219, 0xfeb729f1, 0xefca4388, 0x56319860, 0xcd30977a, + 0x74cb4c92, 0x65b626eb, 0xdc4dfd03, 0xcca7a1d1, 0x755c7a39, + 0x64211040, 0xdddacba8, 0x46dbc4b2, 0xff201f5a, 0xee5d7523, + 0x57a6aecb, 0x032e6d56, 0xbad5b6be, 0xaba8dcc7, 0x1253072f, + 0x89520835, 0x30a9d3dd, 0x21d4b9a4, 0x982f624c, 0xcafb7b7d, + 0x7300a095, 0x627dcaec, 0xdb861104, 0x40871e1e, 0xf97cc5f6, + 0xe801af8f, 0x51fa7467, 0x0572b7fa, 0xbc896c12, 0xadf4066b, + 0x140fdd83, 0x8f0ed299, 0x36f50971, 0x27886308, 0x9e73b8e0, + 0x8e99e432, 0x37623fda, 0x261f55a3, 0x9fe48e4b, 0x04e58151, + 0xbd1e5ab9, 0xac6330c0, 0x1598eb28, 0x411028b5, 0xf8ebf35d, + 0xe9969924, 0x506d42cc, 0xcb6c4dd6, 0x7297963e, 0x63eafc47, + 0xda1127af, 0x423e45e3, 0xfbc59e0b, 0xeab8f472, 0x53432f9a, + 0xc8422080, 0x71b9fb68, 0x60c49111, 0xd93f4af9, 0x8db78964, + 0x344c528c, 0x253138f5, 0x9ccae31d, 0x07cbec07, 0xbe3037ef, + 0xaf4d5d96, 0x16b6867e, 0x065cdaac, 0xbfa70144, 0xaeda6b3d, + 0x1721b0d5, 0x8c20bfcf, 0x35db6427, 0x24a60e5e, 0x9d5dd5b6, + 0xc9d5162b, 0x702ecdc3, 0x6153a7ba, 0xd8a87c52, 0x43a97348, + 0xfa52a8a0, 0xeb2fc2d9, 0x52d41931, 0x4e87f0bb, 0xf77c2b53, + 0xe601412a, 0x5ffa9ac2, 0xc4fb95d8, 0x7d004e30, 0x6c7d2449, + 0xd586ffa1, 0x810e3c3c, 0x38f5e7d4, 0x29888dad, 0x90735645, + 0x0b72595f, 0xb28982b7, 0xa3f4e8ce, 0x1a0f3326, 0x0ae56ff4, + 0xb31eb41c, 0xa263de65, 0x1b98058d, 0x80990a97, 0x3962d17f, + 0x281fbb06, 0x91e460ee, 0xc56ca373, 0x7c97789b, 0x6dea12e2, + 0xd411c90a, 0x4f10c610, 0xf6eb1df8, 0xe7967781, 0x5e6dac69, + 0xc642ce25, 0x7fb915cd, 0x6ec47fb4, 0xd73fa45c, 0x4c3eab46, + 0xf5c570ae, 0xe4b81ad7, 0x5d43c13f, 0x09cb02a2, 0xb030d94a, + 0xa14db333, 0x18b668db, 0x83b767c1, 0x3a4cbc29, 0x2b31d650, + 0x92ca0db8, 0x8220516a, 0x3bdb8a82, 0x2aa6e0fb, 0x935d3b13, + 0x085c3409, 0xb1a7efe1, 0xa0da8598, 0x19215e70, 0x4da99ded, + 0xf4524605, 0xe52f2c7c, 0x5cd4f794, 0xc7d5f88e, 0x7e2e2366, + 0x6f53491f, 0xd6a892f7, 0x847c8bc6, 0x3d87502e, 0x2cfa3a57, + 0x9501e1bf, 0x0e00eea5, 0xb7fb354d, 0xa6865f34, 0x1f7d84dc, + 0x4bf54741, 0xf20e9ca9, 0xe373f6d0, 0x5a882d38, 0xc1892222, + 0x7872f9ca, 0x690f93b3, 0xd0f4485b, 0xc01e1489, 0x79e5cf61, + 0x6898a518, 0xd1637ef0, 0x4a6271ea, 0xf399aa02, 0xe2e4c07b, + 0x5b1f1b93, 0x0f97d80e, 0xb66c03e6, 0xa711699f, 0x1eeab277, + 0x85ebbd6d, 0x3c106685, 0x2d6d0cfc, 0x9496d714, 0x0cb9b558, + 0xb5426eb0, 0xa43f04c9, 0x1dc4df21, 0x86c5d03b, 0x3f3e0bd3, + 0x2e4361aa, 0x97b8ba42, 0xc33079df, 0x7acba237, 0x6bb6c84e, + 0xd24d13a6, 0x494c1cbc, 0xf0b7c754, 0xe1caad2d, 0x583176c5, + 0x48db2a17, 0xf120f1ff, 0xe05d9b86, 0x59a6406e, 0xc2a74f74, + 0x7b5c949c, 0x6a21fee5, 0xd3da250d, 0x8752e690, 0x3ea93d78, + 0x2fd45701, 0x962f8ce9, 0x0d2e83f3, 0xb4d5581b, 0xa5a83262, + 0x1c53e98a}, + {0x00000000, 0x9d0fe176, 0xe16ec4ad, 0x7c6125db, 0x19ac8f1b, + 0x84a36e6d, 0xf8c24bb6, 0x65cdaac0, 0x33591e36, 0xae56ff40, + 0xd237da9b, 0x4f383bed, 0x2af5912d, 0xb7fa705b, 0xcb9b5580, + 0x5694b4f6, 0x66b23c6c, 0xfbbddd1a, 0x87dcf8c1, 0x1ad319b7, + 0x7f1eb377, 0xe2115201, 0x9e7077da, 0x037f96ac, 0x55eb225a, + 0xc8e4c32c, 0xb485e6f7, 0x298a0781, 0x4c47ad41, 0xd1484c37, + 0xad2969ec, 0x3026889a, 0xcd6478d8, 0x506b99ae, 0x2c0abc75, + 0xb1055d03, 0xd4c8f7c3, 0x49c716b5, 0x35a6336e, 0xa8a9d218, + 0xfe3d66ee, 0x63328798, 0x1f53a243, 0x825c4335, 0xe791e9f5, + 0x7a9e0883, 0x06ff2d58, 0x9bf0cc2e, 0xabd644b4, 0x36d9a5c2, + 0x4ab88019, 0xd7b7616f, 0xb27acbaf, 0x2f752ad9, 0x53140f02, + 0xce1bee74, 0x988f5a82, 0x0580bbf4, 0x79e19e2f, 0xe4ee7f59, + 0x8123d599, 0x1c2c34ef, 0x604d1134, 0xfd42f042, 0x41b9f7f1, + 0xdcb61687, 0xa0d7335c, 0x3dd8d22a, 0x581578ea, 0xc51a999c, + 0xb97bbc47, 0x24745d31, 0x72e0e9c7, 0xefef08b1, 0x938e2d6a, + 0x0e81cc1c, 0x6b4c66dc, 0xf64387aa, 0x8a22a271, 0x172d4307, + 0x270bcb9d, 0xba042aeb, 0xc6650f30, 0x5b6aee46, 0x3ea74486, + 0xa3a8a5f0, 0xdfc9802b, 0x42c6615d, 0x1452d5ab, 0x895d34dd, + 0xf53c1106, 0x6833f070, 0x0dfe5ab0, 0x90f1bbc6, 0xec909e1d, + 0x719f7f6b, 0x8cdd8f29, 0x11d26e5f, 0x6db34b84, 0xf0bcaaf2, + 0x95710032, 0x087ee144, 0x741fc49f, 0xe91025e9, 0xbf84911f, + 0x228b7069, 0x5eea55b2, 0xc3e5b4c4, 0xa6281e04, 0x3b27ff72, + 0x4746daa9, 0xda493bdf, 0xea6fb345, 0x77605233, 0x0b0177e8, + 0x960e969e, 0xf3c33c5e, 0x6eccdd28, 0x12adf8f3, 0x8fa21985, + 0xd936ad73, 0x44394c05, 0x385869de, 0xa55788a8, 0xc09a2268, + 0x5d95c31e, 0x21f4e6c5, 0xbcfb07b3, 0x8373efe2, 0x1e7c0e94, + 0x621d2b4f, 0xff12ca39, 0x9adf60f9, 0x07d0818f, 0x7bb1a454, + 0xe6be4522, 0xb02af1d4, 0x2d2510a2, 0x51443579, 0xcc4bd40f, + 0xa9867ecf, 0x34899fb9, 0x48e8ba62, 0xd5e75b14, 0xe5c1d38e, + 0x78ce32f8, 0x04af1723, 0x99a0f655, 0xfc6d5c95, 0x6162bde3, + 0x1d039838, 0x800c794e, 0xd698cdb8, 0x4b972cce, 0x37f60915, + 0xaaf9e863, 0xcf3442a3, 0x523ba3d5, 0x2e5a860e, 0xb3556778, + 0x4e17973a, 0xd318764c, 0xaf795397, 0x3276b2e1, 0x57bb1821, + 0xcab4f957, 0xb6d5dc8c, 0x2bda3dfa, 0x7d4e890c, 0xe041687a, + 0x9c204da1, 0x012facd7, 0x64e20617, 0xf9ede761, 0x858cc2ba, + 0x188323cc, 0x28a5ab56, 0xb5aa4a20, 0xc9cb6ffb, 0x54c48e8d, + 0x3109244d, 0xac06c53b, 0xd067e0e0, 0x4d680196, 0x1bfcb560, + 0x86f35416, 0xfa9271cd, 0x679d90bb, 0x02503a7b, 0x9f5fdb0d, + 0xe33efed6, 0x7e311fa0, 0xc2ca1813, 0x5fc5f965, 0x23a4dcbe, + 0xbeab3dc8, 0xdb669708, 0x4669767e, 0x3a0853a5, 0xa707b2d3, + 0xf1930625, 0x6c9ce753, 0x10fdc288, 0x8df223fe, 0xe83f893e, + 0x75306848, 0x09514d93, 0x945eace5, 0xa478247f, 0x3977c509, + 0x4516e0d2, 0xd81901a4, 0xbdd4ab64, 0x20db4a12, 0x5cba6fc9, + 0xc1b58ebf, 0x97213a49, 0x0a2edb3f, 0x764ffee4, 0xeb401f92, + 0x8e8db552, 0x13825424, 0x6fe371ff, 0xf2ec9089, 0x0fae60cb, + 0x92a181bd, 0xeec0a466, 0x73cf4510, 0x1602efd0, 0x8b0d0ea6, + 0xf76c2b7d, 0x6a63ca0b, 0x3cf77efd, 0xa1f89f8b, 0xdd99ba50, + 0x40965b26, 0x255bf1e6, 0xb8541090, 0xc435354b, 0x593ad43d, + 0x691c5ca7, 0xf413bdd1, 0x8872980a, 0x157d797c, 0x70b0d3bc, + 0xedbf32ca, 0x91de1711, 0x0cd1f667, 0x5a454291, 0xc74aa3e7, + 0xbb2b863c, 0x2624674a, 0x43e9cd8a, 0xdee62cfc, 0xa2870927, + 0x3f88e851}, + {0x00000000, 0xdd96d985, 0x605cb54b, 0xbdca6cce, 0xc0b96a96, + 0x1d2fb313, 0xa0e5dfdd, 0x7d730658, 0x5a03d36d, 0x87950ae8, + 0x3a5f6626, 0xe7c9bfa3, 0x9abab9fb, 0x472c607e, 0xfae60cb0, + 0x2770d535, 0xb407a6da, 0x69917f5f, 0xd45b1391, 0x09cdca14, + 0x74becc4c, 0xa92815c9, 0x14e27907, 0xc974a082, 0xee0475b7, + 0x3392ac32, 0x8e58c0fc, 0x53ce1979, 0x2ebd1f21, 0xf32bc6a4, + 0x4ee1aa6a, 0x937773ef, 0xb37e4bf5, 0x6ee89270, 0xd322febe, + 0x0eb4273b, 0x73c72163, 0xae51f8e6, 0x139b9428, 0xce0d4dad, + 0xe97d9898, 0x34eb411d, 0x89212dd3, 0x54b7f456, 0x29c4f20e, + 0xf4522b8b, 0x49984745, 0x940e9ec0, 0x0779ed2f, 0xdaef34aa, + 0x67255864, 0xbab381e1, 0xc7c087b9, 0x1a565e3c, 0xa79c32f2, + 0x7a0aeb77, 0x5d7a3e42, 0x80ece7c7, 0x3d268b09, 0xe0b0528c, + 0x9dc354d4, 0x40558d51, 0xfd9fe19f, 0x2009381a, 0xbd8d91ab, + 0x601b482e, 0xddd124e0, 0x0047fd65, 0x7d34fb3d, 0xa0a222b8, + 0x1d684e76, 0xc0fe97f3, 0xe78e42c6, 0x3a189b43, 0x87d2f78d, + 0x5a442e08, 0x27372850, 0xfaa1f1d5, 0x476b9d1b, 0x9afd449e, + 0x098a3771, 0xd41ceef4, 0x69d6823a, 0xb4405bbf, 0xc9335de7, + 0x14a58462, 0xa96fe8ac, 0x74f93129, 0x5389e41c, 0x8e1f3d99, + 0x33d55157, 0xee4388d2, 0x93308e8a, 0x4ea6570f, 0xf36c3bc1, + 0x2efae244, 0x0ef3da5e, 0xd36503db, 0x6eaf6f15, 0xb339b690, + 0xce4ab0c8, 0x13dc694d, 0xae160583, 0x7380dc06, 0x54f00933, + 0x8966d0b6, 0x34acbc78, 0xe93a65fd, 0x944963a5, 0x49dfba20, + 0xf415d6ee, 0x29830f6b, 0xbaf47c84, 0x6762a501, 0xdaa8c9cf, + 0x073e104a, 0x7a4d1612, 0xa7dbcf97, 0x1a11a359, 0xc7877adc, + 0xe0f7afe9, 0x3d61766c, 0x80ab1aa2, 0x5d3dc327, 0x204ec57f, + 0xfdd81cfa, 0x40127034, 0x9d84a9b1, 0xa06a2517, 0x7dfcfc92, + 0xc036905c, 0x1da049d9, 0x60d34f81, 0xbd459604, 0x008ffaca, + 0xdd19234f, 0xfa69f67a, 0x27ff2fff, 0x9a354331, 0x47a39ab4, + 0x3ad09cec, 0xe7464569, 0x5a8c29a7, 0x871af022, 0x146d83cd, + 0xc9fb5a48, 0x74313686, 0xa9a7ef03, 0xd4d4e95b, 0x094230de, + 0xb4885c10, 0x691e8595, 0x4e6e50a0, 0x93f88925, 0x2e32e5eb, + 0xf3a43c6e, 0x8ed73a36, 0x5341e3b3, 0xee8b8f7d, 0x331d56f8, + 0x13146ee2, 0xce82b767, 0x7348dba9, 0xaede022c, 0xd3ad0474, + 0x0e3bddf1, 0xb3f1b13f, 0x6e6768ba, 0x4917bd8f, 0x9481640a, + 0x294b08c4, 0xf4ddd141, 0x89aed719, 0x54380e9c, 0xe9f26252, + 0x3464bbd7, 0xa713c838, 0x7a8511bd, 0xc74f7d73, 0x1ad9a4f6, + 0x67aaa2ae, 0xba3c7b2b, 0x07f617e5, 0xda60ce60, 0xfd101b55, + 0x2086c2d0, 0x9d4cae1e, 0x40da779b, 0x3da971c3, 0xe03fa846, + 0x5df5c488, 0x80631d0d, 0x1de7b4bc, 0xc0716d39, 0x7dbb01f7, + 0xa02dd872, 0xdd5ede2a, 0x00c807af, 0xbd026b61, 0x6094b2e4, + 0x47e467d1, 0x9a72be54, 0x27b8d29a, 0xfa2e0b1f, 0x875d0d47, + 0x5acbd4c2, 0xe701b80c, 0x3a976189, 0xa9e01266, 0x7476cbe3, + 0xc9bca72d, 0x142a7ea8, 0x695978f0, 0xb4cfa175, 0x0905cdbb, + 0xd493143e, 0xf3e3c10b, 0x2e75188e, 0x93bf7440, 0x4e29adc5, + 0x335aab9d, 0xeecc7218, 0x53061ed6, 0x8e90c753, 0xae99ff49, + 0x730f26cc, 0xcec54a02, 0x13539387, 0x6e2095df, 0xb3b64c5a, + 0x0e7c2094, 0xd3eaf911, 0xf49a2c24, 0x290cf5a1, 0x94c6996f, + 0x495040ea, 0x342346b2, 0xe9b59f37, 0x547ff3f9, 0x89e92a7c, + 0x1a9e5993, 0xc7088016, 0x7ac2ecd8, 0xa754355d, 0xda273305, + 0x07b1ea80, 0xba7b864e, 0x67ed5fcb, 0x409d8afe, 0x9d0b537b, + 0x20c13fb5, 0xfd57e630, 0x8024e068, 0x5db239ed, 0xe0785523, + 0x3dee8ca6}, + {0x00000000, 0x9ba54c6f, 0xec3b9e9f, 0x779ed2f0, 0x03063b7f, + 0x98a37710, 0xef3da5e0, 0x7498e98f, 0x060c76fe, 0x9da93a91, + 0xea37e861, 0x7192a40e, 0x050a4d81, 0x9eaf01ee, 0xe931d31e, + 0x72949f71, 0x0c18edfc, 0x97bda193, 0xe0237363, 0x7b863f0c, + 0x0f1ed683, 0x94bb9aec, 0xe325481c, 0x78800473, 0x0a149b02, + 0x91b1d76d, 0xe62f059d, 0x7d8a49f2, 0x0912a07d, 0x92b7ec12, + 0xe5293ee2, 0x7e8c728d, 0x1831dbf8, 0x83949797, 0xf40a4567, + 0x6faf0908, 0x1b37e087, 0x8092ace8, 0xf70c7e18, 0x6ca93277, + 0x1e3dad06, 0x8598e169, 0xf2063399, 0x69a37ff6, 0x1d3b9679, + 0x869eda16, 0xf10008e6, 0x6aa54489, 0x14293604, 0x8f8c7a6b, + 0xf812a89b, 0x63b7e4f4, 0x172f0d7b, 0x8c8a4114, 0xfb1493e4, + 0x60b1df8b, 0x122540fa, 0x89800c95, 0xfe1ede65, 0x65bb920a, + 0x11237b85, 0x8a8637ea, 0xfd18e51a, 0x66bda975, 0x3063b7f0, + 0xabc6fb9f, 0xdc58296f, 0x47fd6500, 0x33658c8f, 0xa8c0c0e0, + 0xdf5e1210, 0x44fb5e7f, 0x366fc10e, 0xadca8d61, 0xda545f91, + 0x41f113fe, 0x3569fa71, 0xaeccb61e, 0xd95264ee, 0x42f72881, + 0x3c7b5a0c, 0xa7de1663, 0xd040c493, 0x4be588fc, 0x3f7d6173, + 0xa4d82d1c, 0xd346ffec, 0x48e3b383, 0x3a772cf2, 0xa1d2609d, + 0xd64cb26d, 0x4de9fe02, 0x3971178d, 0xa2d45be2, 0xd54a8912, + 0x4eefc57d, 0x28526c08, 0xb3f72067, 0xc469f297, 0x5fccbef8, + 0x2b545777, 0xb0f11b18, 0xc76fc9e8, 0x5cca8587, 0x2e5e1af6, + 0xb5fb5699, 0xc2658469, 0x59c0c806, 0x2d582189, 0xb6fd6de6, + 0xc163bf16, 0x5ac6f379, 0x244a81f4, 0xbfefcd9b, 0xc8711f6b, + 0x53d45304, 0x274cba8b, 0xbce9f6e4, 0xcb772414, 0x50d2687b, + 0x2246f70a, 0xb9e3bb65, 0xce7d6995, 0x55d825fa, 0x2140cc75, + 0xbae5801a, 0xcd7b52ea, 0x56de1e85, 0x60c76fe0, 0xfb62238f, + 0x8cfcf17f, 0x1759bd10, 0x63c1549f, 0xf86418f0, 0x8ffaca00, + 0x145f866f, 0x66cb191e, 0xfd6e5571, 0x8af08781, 0x1155cbee, + 0x65cd2261, 0xfe686e0e, 0x89f6bcfe, 0x1253f091, 0x6cdf821c, + 0xf77ace73, 0x80e41c83, 0x1b4150ec, 0x6fd9b963, 0xf47cf50c, + 0x83e227fc, 0x18476b93, 0x6ad3f4e2, 0xf176b88d, 0x86e86a7d, + 0x1d4d2612, 0x69d5cf9d, 0xf27083f2, 0x85ee5102, 0x1e4b1d6d, + 0x78f6b418, 0xe353f877, 0x94cd2a87, 0x0f6866e8, 0x7bf08f67, + 0xe055c308, 0x97cb11f8, 0x0c6e5d97, 0x7efac2e6, 0xe55f8e89, + 0x92c15c79, 0x09641016, 0x7dfcf999, 0xe659b5f6, 0x91c76706, + 0x0a622b69, 0x74ee59e4, 0xef4b158b, 0x98d5c77b, 0x03708b14, + 0x77e8629b, 0xec4d2ef4, 0x9bd3fc04, 0x0076b06b, 0x72e22f1a, + 0xe9476375, 0x9ed9b185, 0x057cfdea, 0x71e41465, 0xea41580a, + 0x9ddf8afa, 0x067ac695, 0x50a4d810, 0xcb01947f, 0xbc9f468f, + 0x273a0ae0, 0x53a2e36f, 0xc807af00, 0xbf997df0, 0x243c319f, + 0x56a8aeee, 0xcd0de281, 0xba933071, 0x21367c1e, 0x55ae9591, + 0xce0bd9fe, 0xb9950b0e, 0x22304761, 0x5cbc35ec, 0xc7197983, + 0xb087ab73, 0x2b22e71c, 0x5fba0e93, 0xc41f42fc, 0xb381900c, + 0x2824dc63, 0x5ab04312, 0xc1150f7d, 0xb68bdd8d, 0x2d2e91e2, + 0x59b6786d, 0xc2133402, 0xb58de6f2, 0x2e28aa9d, 0x489503e8, + 0xd3304f87, 0xa4ae9d77, 0x3f0bd118, 0x4b933897, 0xd03674f8, + 0xa7a8a608, 0x3c0dea67, 0x4e997516, 0xd53c3979, 0xa2a2eb89, + 0x3907a7e6, 0x4d9f4e69, 0xd63a0206, 0xa1a4d0f6, 0x3a019c99, + 0x448dee14, 0xdf28a27b, 0xa8b6708b, 0x33133ce4, 0x478bd56b, + 0xdc2e9904, 0xabb04bf4, 0x3015079b, 0x428198ea, 0xd924d485, + 0xaeba0675, 0x351f4a1a, 0x4187a395, 0xda22effa, 0xadbc3d0a, + 0x36197165}, + {0x00000000, 0xc18edfc0, 0x586cb9c1, 0x99e26601, 0xb0d97382, + 0x7157ac42, 0xe8b5ca43, 0x293b1583, 0xbac3e145, 0x7b4d3e85, + 0xe2af5884, 0x23218744, 0x0a1a92c7, 0xcb944d07, 0x52762b06, + 0x93f8f4c6, 0xaef6c4cb, 0x6f781b0b, 0xf69a7d0a, 0x3714a2ca, + 0x1e2fb749, 0xdfa16889, 0x46430e88, 0x87cdd148, 0x1435258e, + 0xd5bbfa4e, 0x4c599c4f, 0x8dd7438f, 0xa4ec560c, 0x656289cc, + 0xfc80efcd, 0x3d0e300d, 0x869c8fd7, 0x47125017, 0xdef03616, + 0x1f7ee9d6, 0x3645fc55, 0xf7cb2395, 0x6e294594, 0xafa79a54, + 0x3c5f6e92, 0xfdd1b152, 0x6433d753, 0xa5bd0893, 0x8c861d10, + 0x4d08c2d0, 0xd4eaa4d1, 0x15647b11, 0x286a4b1c, 0xe9e494dc, + 0x7006f2dd, 0xb1882d1d, 0x98b3389e, 0x593de75e, 0xc0df815f, + 0x01515e9f, 0x92a9aa59, 0x53277599, 0xcac51398, 0x0b4bcc58, + 0x2270d9db, 0xe3fe061b, 0x7a1c601a, 0xbb92bfda, 0xd64819ef, + 0x17c6c62f, 0x8e24a02e, 0x4faa7fee, 0x66916a6d, 0xa71fb5ad, + 0x3efdd3ac, 0xff730c6c, 0x6c8bf8aa, 0xad05276a, 0x34e7416b, + 0xf5699eab, 0xdc528b28, 0x1ddc54e8, 0x843e32e9, 0x45b0ed29, + 0x78bedd24, 0xb93002e4, 0x20d264e5, 0xe15cbb25, 0xc867aea6, + 0x09e97166, 0x900b1767, 0x5185c8a7, 0xc27d3c61, 0x03f3e3a1, + 0x9a1185a0, 0x5b9f5a60, 0x72a44fe3, 0xb32a9023, 0x2ac8f622, + 0xeb4629e2, 0x50d49638, 0x915a49f8, 0x08b82ff9, 0xc936f039, + 0xe00de5ba, 0x21833a7a, 0xb8615c7b, 0x79ef83bb, 0xea17777d, + 0x2b99a8bd, 0xb27bcebc, 0x73f5117c, 0x5ace04ff, 0x9b40db3f, + 0x02a2bd3e, 0xc32c62fe, 0xfe2252f3, 0x3fac8d33, 0xa64eeb32, + 0x67c034f2, 0x4efb2171, 0x8f75feb1, 0x169798b0, 0xd7194770, + 0x44e1b3b6, 0x856f6c76, 0x1c8d0a77, 0xdd03d5b7, 0xf438c034, + 0x35b61ff4, 0xac5479f5, 0x6ddaa635, 0x77e1359f, 0xb66fea5f, + 0x2f8d8c5e, 0xee03539e, 0xc738461d, 0x06b699dd, 0x9f54ffdc, + 0x5eda201c, 0xcd22d4da, 0x0cac0b1a, 0x954e6d1b, 0x54c0b2db, + 0x7dfba758, 0xbc757898, 0x25971e99, 0xe419c159, 0xd917f154, + 0x18992e94, 0x817b4895, 0x40f59755, 0x69ce82d6, 0xa8405d16, + 0x31a23b17, 0xf02ce4d7, 0x63d41011, 0xa25acfd1, 0x3bb8a9d0, + 0xfa367610, 0xd30d6393, 0x1283bc53, 0x8b61da52, 0x4aef0592, + 0xf17dba48, 0x30f36588, 0xa9110389, 0x689fdc49, 0x41a4c9ca, + 0x802a160a, 0x19c8700b, 0xd846afcb, 0x4bbe5b0d, 0x8a3084cd, + 0x13d2e2cc, 0xd25c3d0c, 0xfb67288f, 0x3ae9f74f, 0xa30b914e, + 0x62854e8e, 0x5f8b7e83, 0x9e05a143, 0x07e7c742, 0xc6691882, + 0xef520d01, 0x2edcd2c1, 0xb73eb4c0, 0x76b06b00, 0xe5489fc6, + 0x24c64006, 0xbd242607, 0x7caaf9c7, 0x5591ec44, 0x941f3384, + 0x0dfd5585, 0xcc738a45, 0xa1a92c70, 0x6027f3b0, 0xf9c595b1, + 0x384b4a71, 0x11705ff2, 0xd0fe8032, 0x491ce633, 0x889239f3, + 0x1b6acd35, 0xdae412f5, 0x430674f4, 0x8288ab34, 0xabb3beb7, + 0x6a3d6177, 0xf3df0776, 0x3251d8b6, 0x0f5fe8bb, 0xced1377b, + 0x5733517a, 0x96bd8eba, 0xbf869b39, 0x7e0844f9, 0xe7ea22f8, + 0x2664fd38, 0xb59c09fe, 0x7412d63e, 0xedf0b03f, 0x2c7e6fff, + 0x05457a7c, 0xc4cba5bc, 0x5d29c3bd, 0x9ca71c7d, 0x2735a3a7, + 0xe6bb7c67, 0x7f591a66, 0xbed7c5a6, 0x97ecd025, 0x56620fe5, + 0xcf8069e4, 0x0e0eb624, 0x9df642e2, 0x5c789d22, 0xc59afb23, + 0x041424e3, 0x2d2f3160, 0xeca1eea0, 0x754388a1, 0xb4cd5761, + 0x89c3676c, 0x484db8ac, 0xd1afdead, 0x1021016d, 0x391a14ee, + 0xf894cb2e, 0x6176ad2f, 0xa0f872ef, 0x33008629, 0xf28e59e9, + 0x6b6c3fe8, 0xaae2e028, 0x83d9f5ab, 0x42572a6b, 0xdbb54c6a, + 0x1a3b93aa}, + {0x00000000, 0xefc26b3e, 0x04f5d03d, 0xeb37bb03, 0x09eba07a, + 0xe629cb44, 0x0d1e7047, 0xe2dc1b79, 0x13d740f4, 0xfc152bca, + 0x172290c9, 0xf8e0fbf7, 0x1a3ce08e, 0xf5fe8bb0, 0x1ec930b3, + 0xf10b5b8d, 0x27ae81e8, 0xc86cead6, 0x235b51d5, 0xcc993aeb, + 0x2e452192, 0xc1874aac, 0x2ab0f1af, 0xc5729a91, 0x3479c11c, + 0xdbbbaa22, 0x308c1121, 0xdf4e7a1f, 0x3d926166, 0xd2500a58, + 0x3967b15b, 0xd6a5da65, 0x4f5d03d0, 0xa09f68ee, 0x4ba8d3ed, + 0xa46ab8d3, 0x46b6a3aa, 0xa974c894, 0x42437397, 0xad8118a9, + 0x5c8a4324, 0xb348281a, 0x587f9319, 0xb7bdf827, 0x5561e35e, + 0xbaa38860, 0x51943363, 0xbe56585d, 0x68f38238, 0x8731e906, + 0x6c065205, 0x83c4393b, 0x61182242, 0x8eda497c, 0x65edf27f, + 0x8a2f9941, 0x7b24c2cc, 0x94e6a9f2, 0x7fd112f1, 0x901379cf, + 0x72cf62b6, 0x9d0d0988, 0x763ab28b, 0x99f8d9b5, 0x9eba07a0, + 0x71786c9e, 0x9a4fd79d, 0x758dbca3, 0x9751a7da, 0x7893cce4, + 0x93a477e7, 0x7c661cd9, 0x8d6d4754, 0x62af2c6a, 0x89989769, + 0x665afc57, 0x8486e72e, 0x6b448c10, 0x80733713, 0x6fb15c2d, + 0xb9148648, 0x56d6ed76, 0xbde15675, 0x52233d4b, 0xb0ff2632, + 0x5f3d4d0c, 0xb40af60f, 0x5bc89d31, 0xaac3c6bc, 0x4501ad82, + 0xae361681, 0x41f47dbf, 0xa32866c6, 0x4cea0df8, 0xa7ddb6fb, + 0x481fddc5, 0xd1e70470, 0x3e256f4e, 0xd512d44d, 0x3ad0bf73, + 0xd80ca40a, 0x37cecf34, 0xdcf97437, 0x333b1f09, 0xc2304484, + 0x2df22fba, 0xc6c594b9, 0x2907ff87, 0xcbdbe4fe, 0x24198fc0, + 0xcf2e34c3, 0x20ec5ffd, 0xf6498598, 0x198beea6, 0xf2bc55a5, + 0x1d7e3e9b, 0xffa225e2, 0x10604edc, 0xfb57f5df, 0x14959ee1, + 0xe59ec56c, 0x0a5cae52, 0xe16b1551, 0x0ea97e6f, 0xec756516, + 0x03b70e28, 0xe880b52b, 0x0742de15, 0xe6050901, 0x09c7623f, + 0xe2f0d93c, 0x0d32b202, 0xefeea97b, 0x002cc245, 0xeb1b7946, + 0x04d91278, 0xf5d249f5, 0x1a1022cb, 0xf12799c8, 0x1ee5f2f6, + 0xfc39e98f, 0x13fb82b1, 0xf8cc39b2, 0x170e528c, 0xc1ab88e9, + 0x2e69e3d7, 0xc55e58d4, 0x2a9c33ea, 0xc8402893, 0x278243ad, + 0xccb5f8ae, 0x23779390, 0xd27cc81d, 0x3dbea323, 0xd6891820, + 0x394b731e, 0xdb976867, 0x34550359, 0xdf62b85a, 0x30a0d364, + 0xa9580ad1, 0x469a61ef, 0xadaddaec, 0x426fb1d2, 0xa0b3aaab, + 0x4f71c195, 0xa4467a96, 0x4b8411a8, 0xba8f4a25, 0x554d211b, + 0xbe7a9a18, 0x51b8f126, 0xb364ea5f, 0x5ca68161, 0xb7913a62, + 0x5853515c, 0x8ef68b39, 0x6134e007, 0x8a035b04, 0x65c1303a, + 0x871d2b43, 0x68df407d, 0x83e8fb7e, 0x6c2a9040, 0x9d21cbcd, + 0x72e3a0f3, 0x99d41bf0, 0x761670ce, 0x94ca6bb7, 0x7b080089, + 0x903fbb8a, 0x7ffdd0b4, 0x78bf0ea1, 0x977d659f, 0x7c4ade9c, + 0x9388b5a2, 0x7154aedb, 0x9e96c5e5, 0x75a17ee6, 0x9a6315d8, + 0x6b684e55, 0x84aa256b, 0x6f9d9e68, 0x805ff556, 0x6283ee2f, + 0x8d418511, 0x66763e12, 0x89b4552c, 0x5f118f49, 0xb0d3e477, + 0x5be45f74, 0xb426344a, 0x56fa2f33, 0xb938440d, 0x520fff0e, + 0xbdcd9430, 0x4cc6cfbd, 0xa304a483, 0x48331f80, 0xa7f174be, + 0x452d6fc7, 0xaaef04f9, 0x41d8bffa, 0xae1ad4c4, 0x37e20d71, + 0xd820664f, 0x3317dd4c, 0xdcd5b672, 0x3e09ad0b, 0xd1cbc635, + 0x3afc7d36, 0xd53e1608, 0x24354d85, 0xcbf726bb, 0x20c09db8, + 0xcf02f686, 0x2ddeedff, 0xc21c86c1, 0x292b3dc2, 0xc6e956fc, + 0x104c8c99, 0xff8ee7a7, 0x14b95ca4, 0xfb7b379a, 0x19a72ce3, + 0xf66547dd, 0x1d52fcde, 0xf29097e0, 0x039bcc6d, 0xec59a753, + 0x076e1c50, 0xe8ac776e, 0x0a706c17, 0xe5b20729, 0x0e85bc2a, + 0xe147d714}, + {0x00000000, 0x177b1443, 0x2ef62886, 0x398d3cc5, 0x5dec510c, + 0x4a97454f, 0x731a798a, 0x64616dc9, 0xbbd8a218, 0xaca3b65b, + 0x952e8a9e, 0x82559edd, 0xe634f314, 0xf14fe757, 0xc8c2db92, + 0xdfb9cfd1, 0xacc04271, 0xbbbb5632, 0x82366af7, 0x954d7eb4, + 0xf12c137d, 0xe657073e, 0xdfda3bfb, 0xc8a12fb8, 0x1718e069, + 0x0063f42a, 0x39eec8ef, 0x2e95dcac, 0x4af4b165, 0x5d8fa526, + 0x640299e3, 0x73798da0, 0x82f182a3, 0x958a96e0, 0xac07aa25, + 0xbb7cbe66, 0xdf1dd3af, 0xc866c7ec, 0xf1ebfb29, 0xe690ef6a, + 0x392920bb, 0x2e5234f8, 0x17df083d, 0x00a41c7e, 0x64c571b7, + 0x73be65f4, 0x4a335931, 0x5d484d72, 0x2e31c0d2, 0x394ad491, + 0x00c7e854, 0x17bcfc17, 0x73dd91de, 0x64a6859d, 0x5d2bb958, + 0x4a50ad1b, 0x95e962ca, 0x82927689, 0xbb1f4a4c, 0xac645e0f, + 0xc80533c6, 0xdf7e2785, 0xe6f31b40, 0xf1880f03, 0xde920307, + 0xc9e91744, 0xf0642b81, 0xe71f3fc2, 0x837e520b, 0x94054648, + 0xad887a8d, 0xbaf36ece, 0x654aa11f, 0x7231b55c, 0x4bbc8999, + 0x5cc79dda, 0x38a6f013, 0x2fdde450, 0x1650d895, 0x012bccd6, + 0x72524176, 0x65295535, 0x5ca469f0, 0x4bdf7db3, 0x2fbe107a, + 0x38c50439, 0x014838fc, 0x16332cbf, 0xc98ae36e, 0xdef1f72d, + 0xe77ccbe8, 0xf007dfab, 0x9466b262, 0x831da621, 0xba909ae4, + 0xadeb8ea7, 0x5c6381a4, 0x4b1895e7, 0x7295a922, 0x65eebd61, + 0x018fd0a8, 0x16f4c4eb, 0x2f79f82e, 0x3802ec6d, 0xe7bb23bc, + 0xf0c037ff, 0xc94d0b3a, 0xde361f79, 0xba5772b0, 0xad2c66f3, + 0x94a15a36, 0x83da4e75, 0xf0a3c3d5, 0xe7d8d796, 0xde55eb53, + 0xc92eff10, 0xad4f92d9, 0xba34869a, 0x83b9ba5f, 0x94c2ae1c, + 0x4b7b61cd, 0x5c00758e, 0x658d494b, 0x72f65d08, 0x169730c1, + 0x01ec2482, 0x38611847, 0x2f1a0c04, 0x6655004f, 0x712e140c, + 0x48a328c9, 0x5fd83c8a, 0x3bb95143, 0x2cc24500, 0x154f79c5, + 0x02346d86, 0xdd8da257, 0xcaf6b614, 0xf37b8ad1, 0xe4009e92, + 0x8061f35b, 0x971ae718, 0xae97dbdd, 0xb9eccf9e, 0xca95423e, + 0xddee567d, 0xe4636ab8, 0xf3187efb, 0x97791332, 0x80020771, + 0xb98f3bb4, 0xaef42ff7, 0x714de026, 0x6636f465, 0x5fbbc8a0, + 0x48c0dce3, 0x2ca1b12a, 0x3bdaa569, 0x025799ac, 0x152c8def, + 0xe4a482ec, 0xf3df96af, 0xca52aa6a, 0xdd29be29, 0xb948d3e0, + 0xae33c7a3, 0x97befb66, 0x80c5ef25, 0x5f7c20f4, 0x480734b7, + 0x718a0872, 0x66f11c31, 0x029071f8, 0x15eb65bb, 0x2c66597e, + 0x3b1d4d3d, 0x4864c09d, 0x5f1fd4de, 0x6692e81b, 0x71e9fc58, + 0x15889191, 0x02f385d2, 0x3b7eb917, 0x2c05ad54, 0xf3bc6285, + 0xe4c776c6, 0xdd4a4a03, 0xca315e40, 0xae503389, 0xb92b27ca, + 0x80a61b0f, 0x97dd0f4c, 0xb8c70348, 0xafbc170b, 0x96312bce, + 0x814a3f8d, 0xe52b5244, 0xf2504607, 0xcbdd7ac2, 0xdca66e81, + 0x031fa150, 0x1464b513, 0x2de989d6, 0x3a929d95, 0x5ef3f05c, + 0x4988e41f, 0x7005d8da, 0x677ecc99, 0x14074139, 0x037c557a, + 0x3af169bf, 0x2d8a7dfc, 0x49eb1035, 0x5e900476, 0x671d38b3, + 0x70662cf0, 0xafdfe321, 0xb8a4f762, 0x8129cba7, 0x9652dfe4, + 0xf233b22d, 0xe548a66e, 0xdcc59aab, 0xcbbe8ee8, 0x3a3681eb, + 0x2d4d95a8, 0x14c0a96d, 0x03bbbd2e, 0x67dad0e7, 0x70a1c4a4, + 0x492cf861, 0x5e57ec22, 0x81ee23f3, 0x969537b0, 0xaf180b75, + 0xb8631f36, 0xdc0272ff, 0xcb7966bc, 0xf2f45a79, 0xe58f4e3a, + 0x96f6c39a, 0x818dd7d9, 0xb800eb1c, 0xaf7bff5f, 0xcb1a9296, + 0xdc6186d5, 0xe5ecba10, 0xf297ae53, 0x2d2e6182, 0x3a5575c1, + 0x03d84904, 0x14a35d47, 0x70c2308e, 0x67b924cd, 0x5e341808, + 0x494f0c4b}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0x43147b1700000000, 0x8628f62e00000000, + 0xc53c8d3900000000, 0x0c51ec5d00000000, 0x4f45974a00000000, + 0x8a791a7300000000, 0xc96d616400000000, 0x18a2d8bb00000000, + 0x5bb6a3ac00000000, 0x9e8a2e9500000000, 0xdd9e558200000000, + 0x14f334e600000000, 0x57e74ff100000000, 0x92dbc2c800000000, + 0xd1cfb9df00000000, 0x7142c0ac00000000, 0x3256bbbb00000000, + 0xf76a368200000000, 0xb47e4d9500000000, 0x7d132cf100000000, + 0x3e0757e600000000, 0xfb3bdadf00000000, 0xb82fa1c800000000, + 0x69e0181700000000, 0x2af4630000000000, 0xefc8ee3900000000, + 0xacdc952e00000000, 0x65b1f44a00000000, 0x26a58f5d00000000, + 0xe399026400000000, 0xa08d797300000000, 0xa382f18200000000, + 0xe0968a9500000000, 0x25aa07ac00000000, 0x66be7cbb00000000, + 0xafd31ddf00000000, 0xecc766c800000000, 0x29fbebf100000000, + 0x6aef90e600000000, 0xbb20293900000000, 0xf834522e00000000, + 0x3d08df1700000000, 0x7e1ca40000000000, 0xb771c56400000000, + 0xf465be7300000000, 0x3159334a00000000, 0x724d485d00000000, + 0xd2c0312e00000000, 0x91d44a3900000000, 0x54e8c70000000000, + 0x17fcbc1700000000, 0xde91dd7300000000, 0x9d85a66400000000, + 0x58b92b5d00000000, 0x1bad504a00000000, 0xca62e99500000000, + 0x8976928200000000, 0x4c4a1fbb00000000, 0x0f5e64ac00000000, + 0xc63305c800000000, 0x85277edf00000000, 0x401bf3e600000000, + 0x030f88f100000000, 0x070392de00000000, 0x4417e9c900000000, + 0x812b64f000000000, 0xc23f1fe700000000, 0x0b527e8300000000, + 0x4846059400000000, 0x8d7a88ad00000000, 0xce6ef3ba00000000, + 0x1fa14a6500000000, 0x5cb5317200000000, 0x9989bc4b00000000, + 0xda9dc75c00000000, 0x13f0a63800000000, 0x50e4dd2f00000000, + 0x95d8501600000000, 0xd6cc2b0100000000, 0x7641527200000000, + 0x3555296500000000, 0xf069a45c00000000, 0xb37ddf4b00000000, + 0x7a10be2f00000000, 0x3904c53800000000, 0xfc38480100000000, + 0xbf2c331600000000, 0x6ee38ac900000000, 0x2df7f1de00000000, + 0xe8cb7ce700000000, 0xabdf07f000000000, 0x62b2669400000000, + 0x21a61d8300000000, 0xe49a90ba00000000, 0xa78eebad00000000, + 0xa481635c00000000, 0xe795184b00000000, 0x22a9957200000000, + 0x61bdee6500000000, 0xa8d08f0100000000, 0xebc4f41600000000, + 0x2ef8792f00000000, 0x6dec023800000000, 0xbc23bbe700000000, + 0xff37c0f000000000, 0x3a0b4dc900000000, 0x791f36de00000000, + 0xb07257ba00000000, 0xf3662cad00000000, 0x365aa19400000000, + 0x754eda8300000000, 0xd5c3a3f000000000, 0x96d7d8e700000000, + 0x53eb55de00000000, 0x10ff2ec900000000, 0xd9924fad00000000, + 0x9a8634ba00000000, 0x5fbab98300000000, 0x1caec29400000000, + 0xcd617b4b00000000, 0x8e75005c00000000, 0x4b498d6500000000, + 0x085df67200000000, 0xc130971600000000, 0x8224ec0100000000, + 0x4718613800000000, 0x040c1a2f00000000, 0x4f00556600000000, + 0x0c142e7100000000, 0xc928a34800000000, 0x8a3cd85f00000000, + 0x4351b93b00000000, 0x0045c22c00000000, 0xc5794f1500000000, + 0x866d340200000000, 0x57a28ddd00000000, 0x14b6f6ca00000000, + 0xd18a7bf300000000, 0x929e00e400000000, 0x5bf3618000000000, + 0x18e71a9700000000, 0xdddb97ae00000000, 0x9ecfecb900000000, + 0x3e4295ca00000000, 0x7d56eedd00000000, 0xb86a63e400000000, + 0xfb7e18f300000000, 0x3213799700000000, 0x7107028000000000, + 0xb43b8fb900000000, 0xf72ff4ae00000000, 0x26e04d7100000000, + 0x65f4366600000000, 0xa0c8bb5f00000000, 0xe3dcc04800000000, + 0x2ab1a12c00000000, 0x69a5da3b00000000, 0xac99570200000000, + 0xef8d2c1500000000, 0xec82a4e400000000, 0xaf96dff300000000, + 0x6aaa52ca00000000, 0x29be29dd00000000, 0xe0d348b900000000, + 0xa3c733ae00000000, 0x66fbbe9700000000, 0x25efc58000000000, + 0xf4207c5f00000000, 0xb734074800000000, 0x72088a7100000000, + 0x311cf16600000000, 0xf871900200000000, 0xbb65eb1500000000, + 0x7e59662c00000000, 0x3d4d1d3b00000000, 0x9dc0644800000000, + 0xded41f5f00000000, 0x1be8926600000000, 0x58fce97100000000, + 0x9191881500000000, 0xd285f30200000000, 0x17b97e3b00000000, + 0x54ad052c00000000, 0x8562bcf300000000, 0xc676c7e400000000, + 0x034a4add00000000, 0x405e31ca00000000, 0x893350ae00000000, + 0xca272bb900000000, 0x0f1ba68000000000, 0x4c0fdd9700000000, + 0x4803c7b800000000, 0x0b17bcaf00000000, 0xce2b319600000000, + 0x8d3f4a8100000000, 0x44522be500000000, 0x074650f200000000, + 0xc27addcb00000000, 0x816ea6dc00000000, 0x50a11f0300000000, + 0x13b5641400000000, 0xd689e92d00000000, 0x959d923a00000000, + 0x5cf0f35e00000000, 0x1fe4884900000000, 0xdad8057000000000, + 0x99cc7e6700000000, 0x3941071400000000, 0x7a557c0300000000, + 0xbf69f13a00000000, 0xfc7d8a2d00000000, 0x3510eb4900000000, + 0x7604905e00000000, 0xb3381d6700000000, 0xf02c667000000000, + 0x21e3dfaf00000000, 0x62f7a4b800000000, 0xa7cb298100000000, + 0xe4df529600000000, 0x2db233f200000000, 0x6ea648e500000000, + 0xab9ac5dc00000000, 0xe88ebecb00000000, 0xeb81363a00000000, + 0xa8954d2d00000000, 0x6da9c01400000000, 0x2ebdbb0300000000, + 0xe7d0da6700000000, 0xa4c4a17000000000, 0x61f82c4900000000, + 0x22ec575e00000000, 0xf323ee8100000000, 0xb037959600000000, + 0x750b18af00000000, 0x361f63b800000000, 0xff7202dc00000000, + 0xbc6679cb00000000, 0x795af4f200000000, 0x3a4e8fe500000000, + 0x9ac3f69600000000, 0xd9d78d8100000000, 0x1ceb00b800000000, + 0x5fff7baf00000000, 0x96921acb00000000, 0xd58661dc00000000, + 0x10baece500000000, 0x53ae97f200000000, 0x82612e2d00000000, + 0xc175553a00000000, 0x0449d80300000000, 0x475da31400000000, + 0x8e30c27000000000, 0xcd24b96700000000, 0x0818345e00000000, + 0x4b0c4f4900000000}, + {0x0000000000000000, 0x3e6bc2ef00000000, 0x3dd0f50400000000, + 0x03bb37eb00000000, 0x7aa0eb0900000000, 0x44cb29e600000000, + 0x47701e0d00000000, 0x791bdce200000000, 0xf440d71300000000, + 0xca2b15fc00000000, 0xc990221700000000, 0xf7fbe0f800000000, + 0x8ee03c1a00000000, 0xb08bfef500000000, 0xb330c91e00000000, + 0x8d5b0bf100000000, 0xe881ae2700000000, 0xd6ea6cc800000000, + 0xd5515b2300000000, 0xeb3a99cc00000000, 0x9221452e00000000, + 0xac4a87c100000000, 0xaff1b02a00000000, 0x919a72c500000000, + 0x1cc1793400000000, 0x22aabbdb00000000, 0x21118c3000000000, + 0x1f7a4edf00000000, 0x6661923d00000000, 0x580a50d200000000, + 0x5bb1673900000000, 0x65daa5d600000000, 0xd0035d4f00000000, + 0xee689fa000000000, 0xedd3a84b00000000, 0xd3b86aa400000000, + 0xaaa3b64600000000, 0x94c874a900000000, 0x9773434200000000, + 0xa91881ad00000000, 0x24438a5c00000000, 0x1a2848b300000000, + 0x19937f5800000000, 0x27f8bdb700000000, 0x5ee3615500000000, + 0x6088a3ba00000000, 0x6333945100000000, 0x5d5856be00000000, + 0x3882f36800000000, 0x06e9318700000000, 0x0552066c00000000, + 0x3b39c48300000000, 0x4222186100000000, 0x7c49da8e00000000, + 0x7ff2ed6500000000, 0x41992f8a00000000, 0xccc2247b00000000, + 0xf2a9e69400000000, 0xf112d17f00000000, 0xcf79139000000000, + 0xb662cf7200000000, 0x88090d9d00000000, 0x8bb23a7600000000, + 0xb5d9f89900000000, 0xa007ba9e00000000, 0x9e6c787100000000, + 0x9dd74f9a00000000, 0xa3bc8d7500000000, 0xdaa7519700000000, + 0xe4cc937800000000, 0xe777a49300000000, 0xd91c667c00000000, + 0x54476d8d00000000, 0x6a2caf6200000000, 0x6997988900000000, + 0x57fc5a6600000000, 0x2ee7868400000000, 0x108c446b00000000, + 0x1337738000000000, 0x2d5cb16f00000000, 0x488614b900000000, + 0x76edd65600000000, 0x7556e1bd00000000, 0x4b3d235200000000, + 0x3226ffb000000000, 0x0c4d3d5f00000000, 0x0ff60ab400000000, + 0x319dc85b00000000, 0xbcc6c3aa00000000, 0x82ad014500000000, + 0x811636ae00000000, 0xbf7df44100000000, 0xc66628a300000000, + 0xf80dea4c00000000, 0xfbb6dda700000000, 0xc5dd1f4800000000, + 0x7004e7d100000000, 0x4e6f253e00000000, 0x4dd412d500000000, + 0x73bfd03a00000000, 0x0aa40cd800000000, 0x34cfce3700000000, + 0x3774f9dc00000000, 0x091f3b3300000000, 0x844430c200000000, + 0xba2ff22d00000000, 0xb994c5c600000000, 0x87ff072900000000, + 0xfee4dbcb00000000, 0xc08f192400000000, 0xc3342ecf00000000, + 0xfd5fec2000000000, 0x988549f600000000, 0xa6ee8b1900000000, + 0xa555bcf200000000, 0x9b3e7e1d00000000, 0xe225a2ff00000000, + 0xdc4e601000000000, 0xdff557fb00000000, 0xe19e951400000000, + 0x6cc59ee500000000, 0x52ae5c0a00000000, 0x51156be100000000, + 0x6f7ea90e00000000, 0x166575ec00000000, 0x280eb70300000000, + 0x2bb580e800000000, 0x15de420700000000, 0x010905e600000000, + 0x3f62c70900000000, 0x3cd9f0e200000000, 0x02b2320d00000000, + 0x7ba9eeef00000000, 0x45c22c0000000000, 0x46791beb00000000, + 0x7812d90400000000, 0xf549d2f500000000, 0xcb22101a00000000, + 0xc89927f100000000, 0xf6f2e51e00000000, 0x8fe939fc00000000, + 0xb182fb1300000000, 0xb239ccf800000000, 0x8c520e1700000000, + 0xe988abc100000000, 0xd7e3692e00000000, 0xd4585ec500000000, + 0xea339c2a00000000, 0x932840c800000000, 0xad43822700000000, + 0xaef8b5cc00000000, 0x9093772300000000, 0x1dc87cd200000000, + 0x23a3be3d00000000, 0x201889d600000000, 0x1e734b3900000000, + 0x676897db00000000, 0x5903553400000000, 0x5ab862df00000000, + 0x64d3a03000000000, 0xd10a58a900000000, 0xef619a4600000000, + 0xecdaadad00000000, 0xd2b16f4200000000, 0xabaab3a000000000, + 0x95c1714f00000000, 0x967a46a400000000, 0xa811844b00000000, + 0x254a8fba00000000, 0x1b214d5500000000, 0x189a7abe00000000, + 0x26f1b85100000000, 0x5fea64b300000000, 0x6181a65c00000000, + 0x623a91b700000000, 0x5c51535800000000, 0x398bf68e00000000, + 0x07e0346100000000, 0x045b038a00000000, 0x3a30c16500000000, + 0x432b1d8700000000, 0x7d40df6800000000, 0x7efbe88300000000, + 0x40902a6c00000000, 0xcdcb219d00000000, 0xf3a0e37200000000, + 0xf01bd49900000000, 0xce70167600000000, 0xb76bca9400000000, + 0x8900087b00000000, 0x8abb3f9000000000, 0xb4d0fd7f00000000, + 0xa10ebf7800000000, 0x9f657d9700000000, 0x9cde4a7c00000000, + 0xa2b5889300000000, 0xdbae547100000000, 0xe5c5969e00000000, + 0xe67ea17500000000, 0xd815639a00000000, 0x554e686b00000000, + 0x6b25aa8400000000, 0x689e9d6f00000000, 0x56f55f8000000000, + 0x2fee836200000000, 0x1185418d00000000, 0x123e766600000000, + 0x2c55b48900000000, 0x498f115f00000000, 0x77e4d3b000000000, + 0x745fe45b00000000, 0x4a3426b400000000, 0x332ffa5600000000, + 0x0d4438b900000000, 0x0eff0f5200000000, 0x3094cdbd00000000, + 0xbdcfc64c00000000, 0x83a404a300000000, 0x801f334800000000, + 0xbe74f1a700000000, 0xc76f2d4500000000, 0xf904efaa00000000, + 0xfabfd84100000000, 0xc4d41aae00000000, 0x710de23700000000, + 0x4f6620d800000000, 0x4cdd173300000000, 0x72b6d5dc00000000, + 0x0bad093e00000000, 0x35c6cbd100000000, 0x367dfc3a00000000, + 0x08163ed500000000, 0x854d352400000000, 0xbb26f7cb00000000, + 0xb89dc02000000000, 0x86f602cf00000000, 0xffedde2d00000000, + 0xc1861cc200000000, 0xc23d2b2900000000, 0xfc56e9c600000000, + 0x998c4c1000000000, 0xa7e78eff00000000, 0xa45cb91400000000, + 0x9a377bfb00000000, 0xe32ca71900000000, 0xdd4765f600000000, + 0xdefc521d00000000, 0xe09790f200000000, 0x6dcc9b0300000000, + 0x53a759ec00000000, 0x501c6e0700000000, 0x6e77ace800000000, + 0x176c700a00000000, 0x2907b2e500000000, 0x2abc850e00000000, + 0x14d747e100000000}, + {0x0000000000000000, 0xc0df8ec100000000, 0xc1b96c5800000000, + 0x0166e29900000000, 0x8273d9b000000000, 0x42ac577100000000, + 0x43cab5e800000000, 0x83153b2900000000, 0x45e1c3ba00000000, + 0x853e4d7b00000000, 0x8458afe200000000, 0x4487212300000000, + 0xc7921a0a00000000, 0x074d94cb00000000, 0x062b765200000000, + 0xc6f4f89300000000, 0xcbc4f6ae00000000, 0x0b1b786f00000000, + 0x0a7d9af600000000, 0xcaa2143700000000, 0x49b72f1e00000000, + 0x8968a1df00000000, 0x880e434600000000, 0x48d1cd8700000000, + 0x8e25351400000000, 0x4efabbd500000000, 0x4f9c594c00000000, + 0x8f43d78d00000000, 0x0c56eca400000000, 0xcc89626500000000, + 0xcdef80fc00000000, 0x0d300e3d00000000, 0xd78f9c8600000000, + 0x1750124700000000, 0x1636f0de00000000, 0xd6e97e1f00000000, + 0x55fc453600000000, 0x9523cbf700000000, 0x9445296e00000000, + 0x549aa7af00000000, 0x926e5f3c00000000, 0x52b1d1fd00000000, + 0x53d7336400000000, 0x9308bda500000000, 0x101d868c00000000, + 0xd0c2084d00000000, 0xd1a4ead400000000, 0x117b641500000000, + 0x1c4b6a2800000000, 0xdc94e4e900000000, 0xddf2067000000000, + 0x1d2d88b100000000, 0x9e38b39800000000, 0x5ee73d5900000000, + 0x5f81dfc000000000, 0x9f5e510100000000, 0x59aaa99200000000, + 0x9975275300000000, 0x9813c5ca00000000, 0x58cc4b0b00000000, + 0xdbd9702200000000, 0x1b06fee300000000, 0x1a601c7a00000000, + 0xdabf92bb00000000, 0xef1948d600000000, 0x2fc6c61700000000, + 0x2ea0248e00000000, 0xee7faa4f00000000, 0x6d6a916600000000, + 0xadb51fa700000000, 0xacd3fd3e00000000, 0x6c0c73ff00000000, + 0xaaf88b6c00000000, 0x6a2705ad00000000, 0x6b41e73400000000, + 0xab9e69f500000000, 0x288b52dc00000000, 0xe854dc1d00000000, + 0xe9323e8400000000, 0x29edb04500000000, 0x24ddbe7800000000, + 0xe40230b900000000, 0xe564d22000000000, 0x25bb5ce100000000, + 0xa6ae67c800000000, 0x6671e90900000000, 0x67170b9000000000, + 0xa7c8855100000000, 0x613c7dc200000000, 0xa1e3f30300000000, + 0xa085119a00000000, 0x605a9f5b00000000, 0xe34fa47200000000, + 0x23902ab300000000, 0x22f6c82a00000000, 0xe22946eb00000000, + 0x3896d45000000000, 0xf8495a9100000000, 0xf92fb80800000000, + 0x39f036c900000000, 0xbae50de000000000, 0x7a3a832100000000, + 0x7b5c61b800000000, 0xbb83ef7900000000, 0x7d7717ea00000000, + 0xbda8992b00000000, 0xbcce7bb200000000, 0x7c11f57300000000, + 0xff04ce5a00000000, 0x3fdb409b00000000, 0x3ebda20200000000, + 0xfe622cc300000000, 0xf35222fe00000000, 0x338dac3f00000000, + 0x32eb4ea600000000, 0xf234c06700000000, 0x7121fb4e00000000, + 0xb1fe758f00000000, 0xb098971600000000, 0x704719d700000000, + 0xb6b3e14400000000, 0x766c6f8500000000, 0x770a8d1c00000000, + 0xb7d503dd00000000, 0x34c038f400000000, 0xf41fb63500000000, + 0xf57954ac00000000, 0x35a6da6d00000000, 0x9f35e17700000000, + 0x5fea6fb600000000, 0x5e8c8d2f00000000, 0x9e5303ee00000000, + 0x1d4638c700000000, 0xdd99b60600000000, 0xdcff549f00000000, + 0x1c20da5e00000000, 0xdad422cd00000000, 0x1a0bac0c00000000, + 0x1b6d4e9500000000, 0xdbb2c05400000000, 0x58a7fb7d00000000, + 0x987875bc00000000, 0x991e972500000000, 0x59c119e400000000, + 0x54f117d900000000, 0x942e991800000000, 0x95487b8100000000, + 0x5597f54000000000, 0xd682ce6900000000, 0x165d40a800000000, + 0x173ba23100000000, 0xd7e42cf000000000, 0x1110d46300000000, + 0xd1cf5aa200000000, 0xd0a9b83b00000000, 0x107636fa00000000, + 0x93630dd300000000, 0x53bc831200000000, 0x52da618b00000000, + 0x9205ef4a00000000, 0x48ba7df100000000, 0x8865f33000000000, + 0x890311a900000000, 0x49dc9f6800000000, 0xcac9a44100000000, + 0x0a162a8000000000, 0x0b70c81900000000, 0xcbaf46d800000000, + 0x0d5bbe4b00000000, 0xcd84308a00000000, 0xcce2d21300000000, + 0x0c3d5cd200000000, 0x8f2867fb00000000, 0x4ff7e93a00000000, + 0x4e910ba300000000, 0x8e4e856200000000, 0x837e8b5f00000000, + 0x43a1059e00000000, 0x42c7e70700000000, 0x821869c600000000, + 0x010d52ef00000000, 0xc1d2dc2e00000000, 0xc0b43eb700000000, + 0x006bb07600000000, 0xc69f48e500000000, 0x0640c62400000000, + 0x072624bd00000000, 0xc7f9aa7c00000000, 0x44ec915500000000, + 0x84331f9400000000, 0x8555fd0d00000000, 0x458a73cc00000000, + 0x702ca9a100000000, 0xb0f3276000000000, 0xb195c5f900000000, + 0x714a4b3800000000, 0xf25f701100000000, 0x3280fed000000000, + 0x33e61c4900000000, 0xf339928800000000, 0x35cd6a1b00000000, + 0xf512e4da00000000, 0xf474064300000000, 0x34ab888200000000, + 0xb7beb3ab00000000, 0x77613d6a00000000, 0x7607dff300000000, + 0xb6d8513200000000, 0xbbe85f0f00000000, 0x7b37d1ce00000000, + 0x7a51335700000000, 0xba8ebd9600000000, 0x399b86bf00000000, + 0xf944087e00000000, 0xf822eae700000000, 0x38fd642600000000, + 0xfe099cb500000000, 0x3ed6127400000000, 0x3fb0f0ed00000000, + 0xff6f7e2c00000000, 0x7c7a450500000000, 0xbca5cbc400000000, + 0xbdc3295d00000000, 0x7d1ca79c00000000, 0xa7a3352700000000, + 0x677cbbe600000000, 0x661a597f00000000, 0xa6c5d7be00000000, + 0x25d0ec9700000000, 0xe50f625600000000, 0xe46980cf00000000, + 0x24b60e0e00000000, 0xe242f69d00000000, 0x229d785c00000000, + 0x23fb9ac500000000, 0xe324140400000000, 0x60312f2d00000000, + 0xa0eea1ec00000000, 0xa188437500000000, 0x6157cdb400000000, + 0x6c67c38900000000, 0xacb84d4800000000, 0xaddeafd100000000, + 0x6d01211000000000, 0xee141a3900000000, 0x2ecb94f800000000, + 0x2fad766100000000, 0xef72f8a000000000, 0x2986003300000000, + 0xe9598ef200000000, 0xe83f6c6b00000000, 0x28e0e2aa00000000, + 0xabf5d98300000000, 0x6b2a574200000000, 0x6a4cb5db00000000, + 0xaa933b1a00000000}, + {0x0000000000000000, 0x6f4ca59b00000000, 0x9f9e3bec00000000, + 0xf0d29e7700000000, 0x7f3b060300000000, 0x1077a39800000000, + 0xe0a53def00000000, 0x8fe9987400000000, 0xfe760c0600000000, + 0x913aa99d00000000, 0x61e837ea00000000, 0x0ea4927100000000, + 0x814d0a0500000000, 0xee01af9e00000000, 0x1ed331e900000000, + 0x719f947200000000, 0xfced180c00000000, 0x93a1bd9700000000, + 0x637323e000000000, 0x0c3f867b00000000, 0x83d61e0f00000000, + 0xec9abb9400000000, 0x1c4825e300000000, 0x7304807800000000, + 0x029b140a00000000, 0x6dd7b19100000000, 0x9d052fe600000000, + 0xf2498a7d00000000, 0x7da0120900000000, 0x12ecb79200000000, + 0xe23e29e500000000, 0x8d728c7e00000000, 0xf8db311800000000, + 0x9797948300000000, 0x67450af400000000, 0x0809af6f00000000, + 0x87e0371b00000000, 0xe8ac928000000000, 0x187e0cf700000000, + 0x7732a96c00000000, 0x06ad3d1e00000000, 0x69e1988500000000, + 0x993306f200000000, 0xf67fa36900000000, 0x79963b1d00000000, + 0x16da9e8600000000, 0xe60800f100000000, 0x8944a56a00000000, + 0x0436291400000000, 0x6b7a8c8f00000000, 0x9ba812f800000000, + 0xf4e4b76300000000, 0x7b0d2f1700000000, 0x14418a8c00000000, + 0xe49314fb00000000, 0x8bdfb16000000000, 0xfa40251200000000, + 0x950c808900000000, 0x65de1efe00000000, 0x0a92bb6500000000, + 0x857b231100000000, 0xea37868a00000000, 0x1ae518fd00000000, + 0x75a9bd6600000000, 0xf0b7633000000000, 0x9ffbc6ab00000000, + 0x6f2958dc00000000, 0x0065fd4700000000, 0x8f8c653300000000, + 0xe0c0c0a800000000, 0x10125edf00000000, 0x7f5efb4400000000, + 0x0ec16f3600000000, 0x618dcaad00000000, 0x915f54da00000000, + 0xfe13f14100000000, 0x71fa693500000000, 0x1eb6ccae00000000, + 0xee6452d900000000, 0x8128f74200000000, 0x0c5a7b3c00000000, + 0x6316dea700000000, 0x93c440d000000000, 0xfc88e54b00000000, + 0x73617d3f00000000, 0x1c2dd8a400000000, 0xecff46d300000000, + 0x83b3e34800000000, 0xf22c773a00000000, 0x9d60d2a100000000, + 0x6db24cd600000000, 0x02fee94d00000000, 0x8d17713900000000, + 0xe25bd4a200000000, 0x12894ad500000000, 0x7dc5ef4e00000000, + 0x086c522800000000, 0x6720f7b300000000, 0x97f269c400000000, + 0xf8becc5f00000000, 0x7757542b00000000, 0x181bf1b000000000, + 0xe8c96fc700000000, 0x8785ca5c00000000, 0xf61a5e2e00000000, + 0x9956fbb500000000, 0x698465c200000000, 0x06c8c05900000000, + 0x8921582d00000000, 0xe66dfdb600000000, 0x16bf63c100000000, + 0x79f3c65a00000000, 0xf4814a2400000000, 0x9bcdefbf00000000, + 0x6b1f71c800000000, 0x0453d45300000000, 0x8bba4c2700000000, + 0xe4f6e9bc00000000, 0x142477cb00000000, 0x7b68d25000000000, + 0x0af7462200000000, 0x65bbe3b900000000, 0x95697dce00000000, + 0xfa25d85500000000, 0x75cc402100000000, 0x1a80e5ba00000000, + 0xea527bcd00000000, 0x851ede5600000000, 0xe06fc76000000000, + 0x8f2362fb00000000, 0x7ff1fc8c00000000, 0x10bd591700000000, + 0x9f54c16300000000, 0xf01864f800000000, 0x00cafa8f00000000, + 0x6f865f1400000000, 0x1e19cb6600000000, 0x71556efd00000000, + 0x8187f08a00000000, 0xeecb551100000000, 0x6122cd6500000000, + 0x0e6e68fe00000000, 0xfebcf68900000000, 0x91f0531200000000, + 0x1c82df6c00000000, 0x73ce7af700000000, 0x831ce48000000000, + 0xec50411b00000000, 0x63b9d96f00000000, 0x0cf57cf400000000, + 0xfc27e28300000000, 0x936b471800000000, 0xe2f4d36a00000000, + 0x8db876f100000000, 0x7d6ae88600000000, 0x12264d1d00000000, + 0x9dcfd56900000000, 0xf28370f200000000, 0x0251ee8500000000, + 0x6d1d4b1e00000000, 0x18b4f67800000000, 0x77f853e300000000, + 0x872acd9400000000, 0xe866680f00000000, 0x678ff07b00000000, + 0x08c355e000000000, 0xf811cb9700000000, 0x975d6e0c00000000, + 0xe6c2fa7e00000000, 0x898e5fe500000000, 0x795cc19200000000, + 0x1610640900000000, 0x99f9fc7d00000000, 0xf6b559e600000000, + 0x0667c79100000000, 0x692b620a00000000, 0xe459ee7400000000, + 0x8b154bef00000000, 0x7bc7d59800000000, 0x148b700300000000, + 0x9b62e87700000000, 0xf42e4dec00000000, 0x04fcd39b00000000, + 0x6bb0760000000000, 0x1a2fe27200000000, 0x756347e900000000, + 0x85b1d99e00000000, 0xeafd7c0500000000, 0x6514e47100000000, + 0x0a5841ea00000000, 0xfa8adf9d00000000, 0x95c67a0600000000, + 0x10d8a45000000000, 0x7f9401cb00000000, 0x8f469fbc00000000, + 0xe00a3a2700000000, 0x6fe3a25300000000, 0x00af07c800000000, + 0xf07d99bf00000000, 0x9f313c2400000000, 0xeeaea85600000000, + 0x81e20dcd00000000, 0x713093ba00000000, 0x1e7c362100000000, + 0x9195ae5500000000, 0xfed90bce00000000, 0x0e0b95b900000000, + 0x6147302200000000, 0xec35bc5c00000000, 0x837919c700000000, + 0x73ab87b000000000, 0x1ce7222b00000000, 0x930eba5f00000000, + 0xfc421fc400000000, 0x0c9081b300000000, 0x63dc242800000000, + 0x1243b05a00000000, 0x7d0f15c100000000, 0x8ddd8bb600000000, + 0xe2912e2d00000000, 0x6d78b65900000000, 0x023413c200000000, + 0xf2e68db500000000, 0x9daa282e00000000, 0xe803954800000000, + 0x874f30d300000000, 0x779daea400000000, 0x18d10b3f00000000, + 0x9738934b00000000, 0xf87436d000000000, 0x08a6a8a700000000, + 0x67ea0d3c00000000, 0x1675994e00000000, 0x79393cd500000000, + 0x89eba2a200000000, 0xe6a7073900000000, 0x694e9f4d00000000, + 0x06023ad600000000, 0xf6d0a4a100000000, 0x999c013a00000000, + 0x14ee8d4400000000, 0x7ba228df00000000, 0x8b70b6a800000000, + 0xe43c133300000000, 0x6bd58b4700000000, 0x04992edc00000000, + 0xf44bb0ab00000000, 0x9b07153000000000, 0xea98814200000000, + 0x85d424d900000000, 0x7506baae00000000, 0x1a4a1f3500000000, + 0x95a3874100000000, 0xfaef22da00000000, 0x0a3dbcad00000000, + 0x6571193600000000}, + {0x0000000000000000, 0x85d996dd00000000, 0x4bb55c6000000000, + 0xce6ccabd00000000, 0x966ab9c000000000, 0x13b32f1d00000000, + 0xdddfe5a000000000, 0x5806737d00000000, 0x6dd3035a00000000, + 0xe80a958700000000, 0x26665f3a00000000, 0xa3bfc9e700000000, + 0xfbb9ba9a00000000, 0x7e602c4700000000, 0xb00ce6fa00000000, + 0x35d5702700000000, 0xdaa607b400000000, 0x5f7f916900000000, + 0x91135bd400000000, 0x14cacd0900000000, 0x4cccbe7400000000, + 0xc91528a900000000, 0x0779e21400000000, 0x82a074c900000000, + 0xb77504ee00000000, 0x32ac923300000000, 0xfcc0588e00000000, + 0x7919ce5300000000, 0x211fbd2e00000000, 0xa4c62bf300000000, + 0x6aaae14e00000000, 0xef73779300000000, 0xf54b7eb300000000, + 0x7092e86e00000000, 0xbefe22d300000000, 0x3b27b40e00000000, + 0x6321c77300000000, 0xe6f851ae00000000, 0x28949b1300000000, + 0xad4d0dce00000000, 0x98987de900000000, 0x1d41eb3400000000, + 0xd32d218900000000, 0x56f4b75400000000, 0x0ef2c42900000000, + 0x8b2b52f400000000, 0x4547984900000000, 0xc09e0e9400000000, + 0x2fed790700000000, 0xaa34efda00000000, 0x6458256700000000, + 0xe181b3ba00000000, 0xb987c0c700000000, 0x3c5e561a00000000, + 0xf2329ca700000000, 0x77eb0a7a00000000, 0x423e7a5d00000000, + 0xc7e7ec8000000000, 0x098b263d00000000, 0x8c52b0e000000000, + 0xd454c39d00000000, 0x518d554000000000, 0x9fe19ffd00000000, + 0x1a38092000000000, 0xab918dbd00000000, 0x2e481b6000000000, + 0xe024d1dd00000000, 0x65fd470000000000, 0x3dfb347d00000000, + 0xb822a2a000000000, 0x764e681d00000000, 0xf397fec000000000, + 0xc6428ee700000000, 0x439b183a00000000, 0x8df7d28700000000, + 0x082e445a00000000, 0x5028372700000000, 0xd5f1a1fa00000000, + 0x1b9d6b4700000000, 0x9e44fd9a00000000, 0x71378a0900000000, + 0xf4ee1cd400000000, 0x3a82d66900000000, 0xbf5b40b400000000, + 0xe75d33c900000000, 0x6284a51400000000, 0xace86fa900000000, + 0x2931f97400000000, 0x1ce4895300000000, 0x993d1f8e00000000, + 0x5751d53300000000, 0xd28843ee00000000, 0x8a8e309300000000, + 0x0f57a64e00000000, 0xc13b6cf300000000, 0x44e2fa2e00000000, + 0x5edaf30e00000000, 0xdb0365d300000000, 0x156faf6e00000000, + 0x90b639b300000000, 0xc8b04ace00000000, 0x4d69dc1300000000, + 0x830516ae00000000, 0x06dc807300000000, 0x3309f05400000000, + 0xb6d0668900000000, 0x78bcac3400000000, 0xfd653ae900000000, + 0xa563499400000000, 0x20badf4900000000, 0xeed615f400000000, + 0x6b0f832900000000, 0x847cf4ba00000000, 0x01a5626700000000, + 0xcfc9a8da00000000, 0x4a103e0700000000, 0x12164d7a00000000, + 0x97cfdba700000000, 0x59a3111a00000000, 0xdc7a87c700000000, + 0xe9aff7e000000000, 0x6c76613d00000000, 0xa21aab8000000000, + 0x27c33d5d00000000, 0x7fc54e2000000000, 0xfa1cd8fd00000000, + 0x3470124000000000, 0xb1a9849d00000000, 0x17256aa000000000, + 0x92fcfc7d00000000, 0x5c9036c000000000, 0xd949a01d00000000, + 0x814fd36000000000, 0x049645bd00000000, 0xcafa8f0000000000, + 0x4f2319dd00000000, 0x7af669fa00000000, 0xff2fff2700000000, + 0x3143359a00000000, 0xb49aa34700000000, 0xec9cd03a00000000, + 0x694546e700000000, 0xa7298c5a00000000, 0x22f01a8700000000, + 0xcd836d1400000000, 0x485afbc900000000, 0x8636317400000000, + 0x03efa7a900000000, 0x5be9d4d400000000, 0xde30420900000000, + 0x105c88b400000000, 0x95851e6900000000, 0xa0506e4e00000000, + 0x2589f89300000000, 0xebe5322e00000000, 0x6e3ca4f300000000, + 0x363ad78e00000000, 0xb3e3415300000000, 0x7d8f8bee00000000, + 0xf8561d3300000000, 0xe26e141300000000, 0x67b782ce00000000, + 0xa9db487300000000, 0x2c02deae00000000, 0x7404add300000000, + 0xf1dd3b0e00000000, 0x3fb1f1b300000000, 0xba68676e00000000, + 0x8fbd174900000000, 0x0a64819400000000, 0xc4084b2900000000, + 0x41d1ddf400000000, 0x19d7ae8900000000, 0x9c0e385400000000, + 0x5262f2e900000000, 0xd7bb643400000000, 0x38c813a700000000, + 0xbd11857a00000000, 0x737d4fc700000000, 0xf6a4d91a00000000, + 0xaea2aa6700000000, 0x2b7b3cba00000000, 0xe517f60700000000, + 0x60ce60da00000000, 0x551b10fd00000000, 0xd0c2862000000000, + 0x1eae4c9d00000000, 0x9b77da4000000000, 0xc371a93d00000000, + 0x46a83fe000000000, 0x88c4f55d00000000, 0x0d1d638000000000, + 0xbcb4e71d00000000, 0x396d71c000000000, 0xf701bb7d00000000, + 0x72d82da000000000, 0x2ade5edd00000000, 0xaf07c80000000000, + 0x616b02bd00000000, 0xe4b2946000000000, 0xd167e44700000000, + 0x54be729a00000000, 0x9ad2b82700000000, 0x1f0b2efa00000000, + 0x470d5d8700000000, 0xc2d4cb5a00000000, 0x0cb801e700000000, + 0x8961973a00000000, 0x6612e0a900000000, 0xe3cb767400000000, + 0x2da7bcc900000000, 0xa87e2a1400000000, 0xf078596900000000, + 0x75a1cfb400000000, 0xbbcd050900000000, 0x3e1493d400000000, + 0x0bc1e3f300000000, 0x8e18752e00000000, 0x4074bf9300000000, + 0xc5ad294e00000000, 0x9dab5a3300000000, 0x1872ccee00000000, + 0xd61e065300000000, 0x53c7908e00000000, 0x49ff99ae00000000, + 0xcc260f7300000000, 0x024ac5ce00000000, 0x8793531300000000, + 0xdf95206e00000000, 0x5a4cb6b300000000, 0x94207c0e00000000, + 0x11f9ead300000000, 0x242c9af400000000, 0xa1f50c2900000000, + 0x6f99c69400000000, 0xea40504900000000, 0xb246233400000000, + 0x379fb5e900000000, 0xf9f37f5400000000, 0x7c2ae98900000000, + 0x93599e1a00000000, 0x168008c700000000, 0xd8ecc27a00000000, + 0x5d3554a700000000, 0x053327da00000000, 0x80eab10700000000, + 0x4e867bba00000000, 0xcb5fed6700000000, 0xfe8a9d4000000000, + 0x7b530b9d00000000, 0xb53fc12000000000, 0x30e657fd00000000, + 0x68e0248000000000, 0xed39b25d00000000, 0x235578e000000000, + 0xa68cee3d00000000}, + {0x0000000000000000, 0x76e10f9d00000000, 0xadc46ee100000000, + 0xdb25617c00000000, 0x1b8fac1900000000, 0x6d6ea38400000000, + 0xb64bc2f800000000, 0xc0aacd6500000000, 0x361e593300000000, + 0x40ff56ae00000000, 0x9bda37d200000000, 0xed3b384f00000000, + 0x2d91f52a00000000, 0x5b70fab700000000, 0x80559bcb00000000, + 0xf6b4945600000000, 0x6c3cb26600000000, 0x1addbdfb00000000, + 0xc1f8dc8700000000, 0xb719d31a00000000, 0x77b31e7f00000000, + 0x015211e200000000, 0xda77709e00000000, 0xac967f0300000000, + 0x5a22eb5500000000, 0x2cc3e4c800000000, 0xf7e685b400000000, + 0x81078a2900000000, 0x41ad474c00000000, 0x374c48d100000000, + 0xec6929ad00000000, 0x9a88263000000000, 0xd87864cd00000000, + 0xae996b5000000000, 0x75bc0a2c00000000, 0x035d05b100000000, + 0xc3f7c8d400000000, 0xb516c74900000000, 0x6e33a63500000000, + 0x18d2a9a800000000, 0xee663dfe00000000, 0x9887326300000000, + 0x43a2531f00000000, 0x35435c8200000000, 0xf5e991e700000000, + 0x83089e7a00000000, 0x582dff0600000000, 0x2eccf09b00000000, + 0xb444d6ab00000000, 0xc2a5d93600000000, 0x1980b84a00000000, + 0x6f61b7d700000000, 0xafcb7ab200000000, 0xd92a752f00000000, + 0x020f145300000000, 0x74ee1bce00000000, 0x825a8f9800000000, + 0xf4bb800500000000, 0x2f9ee17900000000, 0x597feee400000000, + 0x99d5238100000000, 0xef342c1c00000000, 0x34114d6000000000, + 0x42f042fd00000000, 0xf1f7b94100000000, 0x8716b6dc00000000, + 0x5c33d7a000000000, 0x2ad2d83d00000000, 0xea78155800000000, + 0x9c991ac500000000, 0x47bc7bb900000000, 0x315d742400000000, + 0xc7e9e07200000000, 0xb108efef00000000, 0x6a2d8e9300000000, + 0x1ccc810e00000000, 0xdc664c6b00000000, 0xaa8743f600000000, + 0x71a2228a00000000, 0x07432d1700000000, 0x9dcb0b2700000000, + 0xeb2a04ba00000000, 0x300f65c600000000, 0x46ee6a5b00000000, + 0x8644a73e00000000, 0xf0a5a8a300000000, 0x2b80c9df00000000, + 0x5d61c64200000000, 0xabd5521400000000, 0xdd345d8900000000, + 0x06113cf500000000, 0x70f0336800000000, 0xb05afe0d00000000, + 0xc6bbf19000000000, 0x1d9e90ec00000000, 0x6b7f9f7100000000, + 0x298fdd8c00000000, 0x5f6ed21100000000, 0x844bb36d00000000, + 0xf2aabcf000000000, 0x3200719500000000, 0x44e17e0800000000, + 0x9fc41f7400000000, 0xe92510e900000000, 0x1f9184bf00000000, + 0x69708b2200000000, 0xb255ea5e00000000, 0xc4b4e5c300000000, + 0x041e28a600000000, 0x72ff273b00000000, 0xa9da464700000000, + 0xdf3b49da00000000, 0x45b36fea00000000, 0x3352607700000000, + 0xe877010b00000000, 0x9e960e9600000000, 0x5e3cc3f300000000, + 0x28ddcc6e00000000, 0xf3f8ad1200000000, 0x8519a28f00000000, + 0x73ad36d900000000, 0x054c394400000000, 0xde69583800000000, + 0xa88857a500000000, 0x68229ac000000000, 0x1ec3955d00000000, + 0xc5e6f42100000000, 0xb307fbbc00000000, 0xe2ef738300000000, + 0x940e7c1e00000000, 0x4f2b1d6200000000, 0x39ca12ff00000000, + 0xf960df9a00000000, 0x8f81d00700000000, 0x54a4b17b00000000, + 0x2245bee600000000, 0xd4f12ab000000000, 0xa210252d00000000, + 0x7935445100000000, 0x0fd44bcc00000000, 0xcf7e86a900000000, + 0xb99f893400000000, 0x62bae84800000000, 0x145be7d500000000, + 0x8ed3c1e500000000, 0xf832ce7800000000, 0x2317af0400000000, + 0x55f6a09900000000, 0x955c6dfc00000000, 0xe3bd626100000000, + 0x3898031d00000000, 0x4e790c8000000000, 0xb8cd98d600000000, + 0xce2c974b00000000, 0x1509f63700000000, 0x63e8f9aa00000000, + 0xa34234cf00000000, 0xd5a33b5200000000, 0x0e865a2e00000000, + 0x786755b300000000, 0x3a97174e00000000, 0x4c7618d300000000, + 0x975379af00000000, 0xe1b2763200000000, 0x2118bb5700000000, + 0x57f9b4ca00000000, 0x8cdcd5b600000000, 0xfa3dda2b00000000, + 0x0c894e7d00000000, 0x7a6841e000000000, 0xa14d209c00000000, + 0xd7ac2f0100000000, 0x1706e26400000000, 0x61e7edf900000000, + 0xbac28c8500000000, 0xcc23831800000000, 0x56aba52800000000, + 0x204aaab500000000, 0xfb6fcbc900000000, 0x8d8ec45400000000, + 0x4d24093100000000, 0x3bc506ac00000000, 0xe0e067d000000000, + 0x9601684d00000000, 0x60b5fc1b00000000, 0x1654f38600000000, + 0xcd7192fa00000000, 0xbb909d6700000000, 0x7b3a500200000000, + 0x0ddb5f9f00000000, 0xd6fe3ee300000000, 0xa01f317e00000000, + 0x1318cac200000000, 0x65f9c55f00000000, 0xbedca42300000000, + 0xc83dabbe00000000, 0x089766db00000000, 0x7e76694600000000, + 0xa553083a00000000, 0xd3b207a700000000, 0x250693f100000000, + 0x53e79c6c00000000, 0x88c2fd1000000000, 0xfe23f28d00000000, + 0x3e893fe800000000, 0x4868307500000000, 0x934d510900000000, + 0xe5ac5e9400000000, 0x7f2478a400000000, 0x09c5773900000000, + 0xd2e0164500000000, 0xa40119d800000000, 0x64abd4bd00000000, + 0x124adb2000000000, 0xc96fba5c00000000, 0xbf8eb5c100000000, + 0x493a219700000000, 0x3fdb2e0a00000000, 0xe4fe4f7600000000, + 0x921f40eb00000000, 0x52b58d8e00000000, 0x2454821300000000, + 0xff71e36f00000000, 0x8990ecf200000000, 0xcb60ae0f00000000, + 0xbd81a19200000000, 0x66a4c0ee00000000, 0x1045cf7300000000, + 0xd0ef021600000000, 0xa60e0d8b00000000, 0x7d2b6cf700000000, + 0x0bca636a00000000, 0xfd7ef73c00000000, 0x8b9ff8a100000000, + 0x50ba99dd00000000, 0x265b964000000000, 0xe6f15b2500000000, + 0x901054b800000000, 0x4b3535c400000000, 0x3dd43a5900000000, + 0xa75c1c6900000000, 0xd1bd13f400000000, 0x0a98728800000000, + 0x7c797d1500000000, 0xbcd3b07000000000, 0xca32bfed00000000, + 0x1117de9100000000, 0x67f6d10c00000000, 0x9142455a00000000, + 0xe7a34ac700000000, 0x3c862bbb00000000, 0x4a67242600000000, + 0x8acde94300000000, 0xfc2ce6de00000000, 0x270987a200000000, + 0x51e8883f00000000}, + {0x0000000000000000, 0xe8dbfbb900000000, 0x91b186a800000000, + 0x796a7d1100000000, 0x63657c8a00000000, 0x8bbe873300000000, + 0xf2d4fa2200000000, 0x1a0f019b00000000, 0x87cc89cf00000000, + 0x6f17727600000000, 0x167d0f6700000000, 0xfea6f4de00000000, + 0xe4a9f54500000000, 0x0c720efc00000000, 0x751873ed00000000, + 0x9dc3885400000000, 0x4f9f624400000000, 0xa74499fd00000000, + 0xde2ee4ec00000000, 0x36f51f5500000000, 0x2cfa1ece00000000, + 0xc421e57700000000, 0xbd4b986600000000, 0x559063df00000000, + 0xc853eb8b00000000, 0x2088103200000000, 0x59e26d2300000000, + 0xb139969a00000000, 0xab36970100000000, 0x43ed6cb800000000, + 0x3a8711a900000000, 0xd25cea1000000000, 0x9e3ec58800000000, + 0x76e53e3100000000, 0x0f8f432000000000, 0xe754b89900000000, + 0xfd5bb90200000000, 0x158042bb00000000, 0x6cea3faa00000000, + 0x8431c41300000000, 0x19f24c4700000000, 0xf129b7fe00000000, + 0x8843caef00000000, 0x6098315600000000, 0x7a9730cd00000000, + 0x924ccb7400000000, 0xeb26b66500000000, 0x03fd4ddc00000000, + 0xd1a1a7cc00000000, 0x397a5c7500000000, 0x4010216400000000, + 0xa8cbdadd00000000, 0xb2c4db4600000000, 0x5a1f20ff00000000, + 0x23755dee00000000, 0xcbaea65700000000, 0x566d2e0300000000, + 0xbeb6d5ba00000000, 0xc7dca8ab00000000, 0x2f07531200000000, + 0x3508528900000000, 0xddd3a93000000000, 0xa4b9d42100000000, + 0x4c622f9800000000, 0x7d7bfbca00000000, 0x95a0007300000000, + 0xecca7d6200000000, 0x041186db00000000, 0x1e1e874000000000, + 0xf6c57cf900000000, 0x8faf01e800000000, 0x6774fa5100000000, + 0xfab7720500000000, 0x126c89bc00000000, 0x6b06f4ad00000000, + 0x83dd0f1400000000, 0x99d20e8f00000000, 0x7109f53600000000, + 0x0863882700000000, 0xe0b8739e00000000, 0x32e4998e00000000, + 0xda3f623700000000, 0xa3551f2600000000, 0x4b8ee49f00000000, + 0x5181e50400000000, 0xb95a1ebd00000000, 0xc03063ac00000000, + 0x28eb981500000000, 0xb528104100000000, 0x5df3ebf800000000, + 0x249996e900000000, 0xcc426d5000000000, 0xd64d6ccb00000000, + 0x3e96977200000000, 0x47fcea6300000000, 0xaf2711da00000000, + 0xe3453e4200000000, 0x0b9ec5fb00000000, 0x72f4b8ea00000000, + 0x9a2f435300000000, 0x802042c800000000, 0x68fbb97100000000, + 0x1191c46000000000, 0xf94a3fd900000000, 0x6489b78d00000000, + 0x8c524c3400000000, 0xf538312500000000, 0x1de3ca9c00000000, + 0x07eccb0700000000, 0xef3730be00000000, 0x965d4daf00000000, + 0x7e86b61600000000, 0xacda5c0600000000, 0x4401a7bf00000000, + 0x3d6bdaae00000000, 0xd5b0211700000000, 0xcfbf208c00000000, + 0x2764db3500000000, 0x5e0ea62400000000, 0xb6d55d9d00000000, + 0x2b16d5c900000000, 0xc3cd2e7000000000, 0xbaa7536100000000, + 0x527ca8d800000000, 0x4873a94300000000, 0xa0a852fa00000000, + 0xd9c22feb00000000, 0x3119d45200000000, 0xbbf0874e00000000, + 0x532b7cf700000000, 0x2a4101e600000000, 0xc29afa5f00000000, + 0xd895fbc400000000, 0x304e007d00000000, 0x49247d6c00000000, + 0xa1ff86d500000000, 0x3c3c0e8100000000, 0xd4e7f53800000000, + 0xad8d882900000000, 0x4556739000000000, 0x5f59720b00000000, + 0xb78289b200000000, 0xcee8f4a300000000, 0x26330f1a00000000, + 0xf46fe50a00000000, 0x1cb41eb300000000, 0x65de63a200000000, + 0x8d05981b00000000, 0x970a998000000000, 0x7fd1623900000000, + 0x06bb1f2800000000, 0xee60e49100000000, 0x73a36cc500000000, + 0x9b78977c00000000, 0xe212ea6d00000000, 0x0ac911d400000000, + 0x10c6104f00000000, 0xf81debf600000000, 0x817796e700000000, + 0x69ac6d5e00000000, 0x25ce42c600000000, 0xcd15b97f00000000, + 0xb47fc46e00000000, 0x5ca43fd700000000, 0x46ab3e4c00000000, + 0xae70c5f500000000, 0xd71ab8e400000000, 0x3fc1435d00000000, + 0xa202cb0900000000, 0x4ad930b000000000, 0x33b34da100000000, + 0xdb68b61800000000, 0xc167b78300000000, 0x29bc4c3a00000000, + 0x50d6312b00000000, 0xb80dca9200000000, 0x6a51208200000000, + 0x828adb3b00000000, 0xfbe0a62a00000000, 0x133b5d9300000000, + 0x09345c0800000000, 0xe1efa7b100000000, 0x9885daa000000000, + 0x705e211900000000, 0xed9da94d00000000, 0x054652f400000000, + 0x7c2c2fe500000000, 0x94f7d45c00000000, 0x8ef8d5c700000000, + 0x66232e7e00000000, 0x1f49536f00000000, 0xf792a8d600000000, + 0xc68b7c8400000000, 0x2e50873d00000000, 0x573afa2c00000000, + 0xbfe1019500000000, 0xa5ee000e00000000, 0x4d35fbb700000000, + 0x345f86a600000000, 0xdc847d1f00000000, 0x4147f54b00000000, + 0xa99c0ef200000000, 0xd0f673e300000000, 0x382d885a00000000, + 0x222289c100000000, 0xcaf9727800000000, 0xb3930f6900000000, + 0x5b48f4d000000000, 0x89141ec000000000, 0x61cfe57900000000, + 0x18a5986800000000, 0xf07e63d100000000, 0xea71624a00000000, + 0x02aa99f300000000, 0x7bc0e4e200000000, 0x931b1f5b00000000, + 0x0ed8970f00000000, 0xe6036cb600000000, 0x9f6911a700000000, + 0x77b2ea1e00000000, 0x6dbdeb8500000000, 0x8566103c00000000, + 0xfc0c6d2d00000000, 0x14d7969400000000, 0x58b5b90c00000000, + 0xb06e42b500000000, 0xc9043fa400000000, 0x21dfc41d00000000, + 0x3bd0c58600000000, 0xd30b3e3f00000000, 0xaa61432e00000000, + 0x42bab89700000000, 0xdf7930c300000000, 0x37a2cb7a00000000, + 0x4ec8b66b00000000, 0xa6134dd200000000, 0xbc1c4c4900000000, + 0x54c7b7f000000000, 0x2dadcae100000000, 0xc576315800000000, + 0x172adb4800000000, 0xfff120f100000000, 0x869b5de000000000, + 0x6e40a65900000000, 0x744fa7c200000000, 0x9c945c7b00000000, + 0xe5fe216a00000000, 0x0d25dad300000000, 0x90e6528700000000, + 0x783da93e00000000, 0x0157d42f00000000, 0xe98c2f9600000000, + 0xf3832e0d00000000, 0x1b58d5b400000000, 0x6232a8a500000000, + 0x8ae9531c00000000}, + {0x0000000000000000, 0x919168ae00000000, 0x6325a08700000000, + 0xf2b4c82900000000, 0x874c31d400000000, 0x16dd597a00000000, + 0xe469915300000000, 0x75f8f9fd00000000, 0x4f9f137300000000, + 0xde0e7bdd00000000, 0x2cbab3f400000000, 0xbd2bdb5a00000000, + 0xc8d322a700000000, 0x59424a0900000000, 0xabf6822000000000, + 0x3a67ea8e00000000, 0x9e3e27e600000000, 0x0faf4f4800000000, + 0xfd1b876100000000, 0x6c8aefcf00000000, 0x1972163200000000, + 0x88e37e9c00000000, 0x7a57b6b500000000, 0xebc6de1b00000000, + 0xd1a1349500000000, 0x40305c3b00000000, 0xb284941200000000, + 0x2315fcbc00000000, 0x56ed054100000000, 0xc77c6def00000000, + 0x35c8a5c600000000, 0xa459cd6800000000, 0x7d7b3f1700000000, + 0xecea57b900000000, 0x1e5e9f9000000000, 0x8fcff73e00000000, + 0xfa370ec300000000, 0x6ba6666d00000000, 0x9912ae4400000000, + 0x0883c6ea00000000, 0x32e42c6400000000, 0xa37544ca00000000, + 0x51c18ce300000000, 0xc050e44d00000000, 0xb5a81db000000000, + 0x2439751e00000000, 0xd68dbd3700000000, 0x471cd59900000000, + 0xe34518f100000000, 0x72d4705f00000000, 0x8060b87600000000, + 0x11f1d0d800000000, 0x6409292500000000, 0xf598418b00000000, + 0x072c89a200000000, 0x96bde10c00000000, 0xacda0b8200000000, + 0x3d4b632c00000000, 0xcfffab0500000000, 0x5e6ec3ab00000000, + 0x2b963a5600000000, 0xba0752f800000000, 0x48b39ad100000000, + 0xd922f27f00000000, 0xfaf67e2e00000000, 0x6b67168000000000, + 0x99d3dea900000000, 0x0842b60700000000, 0x7dba4ffa00000000, + 0xec2b275400000000, 0x1e9fef7d00000000, 0x8f0e87d300000000, + 0xb5696d5d00000000, 0x24f805f300000000, 0xd64ccdda00000000, + 0x47dda57400000000, 0x32255c8900000000, 0xa3b4342700000000, + 0x5100fc0e00000000, 0xc09194a000000000, 0x64c859c800000000, + 0xf559316600000000, 0x07edf94f00000000, 0x967c91e100000000, + 0xe384681c00000000, 0x721500b200000000, 0x80a1c89b00000000, + 0x1130a03500000000, 0x2b574abb00000000, 0xbac6221500000000, + 0x4872ea3c00000000, 0xd9e3829200000000, 0xac1b7b6f00000000, + 0x3d8a13c100000000, 0xcf3edbe800000000, 0x5eafb34600000000, + 0x878d413900000000, 0x161c299700000000, 0xe4a8e1be00000000, + 0x7539891000000000, 0x00c170ed00000000, 0x9150184300000000, + 0x63e4d06a00000000, 0xf275b8c400000000, 0xc812524a00000000, + 0x59833ae400000000, 0xab37f2cd00000000, 0x3aa69a6300000000, + 0x4f5e639e00000000, 0xdecf0b3000000000, 0x2c7bc31900000000, + 0xbdeaabb700000000, 0x19b366df00000000, 0x88220e7100000000, + 0x7a96c65800000000, 0xeb07aef600000000, 0x9eff570b00000000, + 0x0f6e3fa500000000, 0xfddaf78c00000000, 0x6c4b9f2200000000, + 0x562c75ac00000000, 0xc7bd1d0200000000, 0x3509d52b00000000, + 0xa498bd8500000000, 0xd160447800000000, 0x40f12cd600000000, + 0xb245e4ff00000000, 0x23d48c5100000000, 0xf4edfd5c00000000, + 0x657c95f200000000, 0x97c85ddb00000000, 0x0659357500000000, + 0x73a1cc8800000000, 0xe230a42600000000, 0x10846c0f00000000, + 0x811504a100000000, 0xbb72ee2f00000000, 0x2ae3868100000000, + 0xd8574ea800000000, 0x49c6260600000000, 0x3c3edffb00000000, + 0xadafb75500000000, 0x5f1b7f7c00000000, 0xce8a17d200000000, + 0x6ad3daba00000000, 0xfb42b21400000000, 0x09f67a3d00000000, + 0x9867129300000000, 0xed9feb6e00000000, 0x7c0e83c000000000, + 0x8eba4be900000000, 0x1f2b234700000000, 0x254cc9c900000000, + 0xb4dda16700000000, 0x4669694e00000000, 0xd7f801e000000000, + 0xa200f81d00000000, 0x339190b300000000, 0xc125589a00000000, + 0x50b4303400000000, 0x8996c24b00000000, 0x1807aae500000000, + 0xeab362cc00000000, 0x7b220a6200000000, 0x0edaf39f00000000, + 0x9f4b9b3100000000, 0x6dff531800000000, 0xfc6e3bb600000000, + 0xc609d13800000000, 0x5798b99600000000, 0xa52c71bf00000000, + 0x34bd191100000000, 0x4145e0ec00000000, 0xd0d4884200000000, + 0x2260406b00000000, 0xb3f128c500000000, 0x17a8e5ad00000000, + 0x86398d0300000000, 0x748d452a00000000, 0xe51c2d8400000000, + 0x90e4d47900000000, 0x0175bcd700000000, 0xf3c174fe00000000, + 0x62501c5000000000, 0x5837f6de00000000, 0xc9a69e7000000000, + 0x3b12565900000000, 0xaa833ef700000000, 0xdf7bc70a00000000, + 0x4eeaafa400000000, 0xbc5e678d00000000, 0x2dcf0f2300000000, + 0x0e1b837200000000, 0x9f8aebdc00000000, 0x6d3e23f500000000, + 0xfcaf4b5b00000000, 0x8957b2a600000000, 0x18c6da0800000000, + 0xea72122100000000, 0x7be37a8f00000000, 0x4184900100000000, + 0xd015f8af00000000, 0x22a1308600000000, 0xb330582800000000, + 0xc6c8a1d500000000, 0x5759c97b00000000, 0xa5ed015200000000, + 0x347c69fc00000000, 0x9025a49400000000, 0x01b4cc3a00000000, + 0xf300041300000000, 0x62916cbd00000000, 0x1769954000000000, + 0x86f8fdee00000000, 0x744c35c700000000, 0xe5dd5d6900000000, + 0xdfbab7e700000000, 0x4e2bdf4900000000, 0xbc9f176000000000, + 0x2d0e7fce00000000, 0x58f6863300000000, 0xc967ee9d00000000, + 0x3bd326b400000000, 0xaa424e1a00000000, 0x7360bc6500000000, + 0xe2f1d4cb00000000, 0x10451ce200000000, 0x81d4744c00000000, + 0xf42c8db100000000, 0x65bde51f00000000, 0x97092d3600000000, + 0x0698459800000000, 0x3cffaf1600000000, 0xad6ec7b800000000, + 0x5fda0f9100000000, 0xce4b673f00000000, 0xbbb39ec200000000, + 0x2a22f66c00000000, 0xd8963e4500000000, 0x490756eb00000000, + 0xed5e9b8300000000, 0x7ccff32d00000000, 0x8e7b3b0400000000, + 0x1fea53aa00000000, 0x6a12aa5700000000, 0xfb83c2f900000000, + 0x09370ad000000000, 0x98a6627e00000000, 0xa2c188f000000000, + 0x3350e05e00000000, 0xc1e4287700000000, 0x507540d900000000, + 0x258db92400000000, 0xb41cd18a00000000, 0x46a819a300000000, + 0xd739710d00000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xccaa009e, 0x4225077d, 0x8e8f07e3, 0x844a0efa, + 0x48e00e64, 0xc66f0987, 0x0ac50919, 0xd3e51bb5, 0x1f4f1b2b, + 0x91c01cc8, 0x5d6a1c56, 0x57af154f, 0x9b0515d1, 0x158a1232, + 0xd92012ac, 0x7cbb312b, 0xb01131b5, 0x3e9e3656, 0xf23436c8, + 0xf8f13fd1, 0x345b3f4f, 0xbad438ac, 0x767e3832, 0xaf5e2a9e, + 0x63f42a00, 0xed7b2de3, 0x21d12d7d, 0x2b142464, 0xe7be24fa, + 0x69312319, 0xa59b2387, 0xf9766256, 0x35dc62c8, 0xbb53652b, + 0x77f965b5, 0x7d3c6cac, 0xb1966c32, 0x3f196bd1, 0xf3b36b4f, + 0x2a9379e3, 0xe639797d, 0x68b67e9e, 0xa41c7e00, 0xaed97719, + 0x62737787, 0xecfc7064, 0x205670fa, 0x85cd537d, 0x496753e3, + 0xc7e85400, 0x0b42549e, 0x01875d87, 0xcd2d5d19, 0x43a25afa, + 0x8f085a64, 0x562848c8, 0x9a824856, 0x140d4fb5, 0xd8a74f2b, + 0xd2624632, 0x1ec846ac, 0x9047414f, 0x5ced41d1, 0x299dc2ed, + 0xe537c273, 0x6bb8c590, 0xa712c50e, 0xadd7cc17, 0x617dcc89, + 0xeff2cb6a, 0x2358cbf4, 0xfa78d958, 0x36d2d9c6, 0xb85dde25, + 0x74f7debb, 0x7e32d7a2, 0xb298d73c, 0x3c17d0df, 0xf0bdd041, + 0x5526f3c6, 0x998cf358, 0x1703f4bb, 0xdba9f425, 0xd16cfd3c, + 0x1dc6fda2, 0x9349fa41, 0x5fe3fadf, 0x86c3e873, 0x4a69e8ed, + 0xc4e6ef0e, 0x084cef90, 0x0289e689, 0xce23e617, 0x40ace1f4, + 0x8c06e16a, 0xd0eba0bb, 0x1c41a025, 0x92cea7c6, 0x5e64a758, + 0x54a1ae41, 0x980baedf, 0x1684a93c, 0xda2ea9a2, 0x030ebb0e, + 0xcfa4bb90, 0x412bbc73, 0x8d81bced, 0x8744b5f4, 0x4beeb56a, + 0xc561b289, 0x09cbb217, 0xac509190, 0x60fa910e, 0xee7596ed, + 0x22df9673, 0x281a9f6a, 0xe4b09ff4, 0x6a3f9817, 0xa6959889, + 0x7fb58a25, 0xb31f8abb, 0x3d908d58, 0xf13a8dc6, 0xfbff84df, + 0x37558441, 0xb9da83a2, 0x7570833c, 0x533b85da, 0x9f918544, + 0x111e82a7, 0xddb48239, 0xd7718b20, 0x1bdb8bbe, 0x95548c5d, + 0x59fe8cc3, 0x80de9e6f, 0x4c749ef1, 0xc2fb9912, 0x0e51998c, + 0x04949095, 0xc83e900b, 0x46b197e8, 0x8a1b9776, 0x2f80b4f1, + 0xe32ab46f, 0x6da5b38c, 0xa10fb312, 0xabcaba0b, 0x6760ba95, + 0xe9efbd76, 0x2545bde8, 0xfc65af44, 0x30cfafda, 0xbe40a839, + 0x72eaa8a7, 0x782fa1be, 0xb485a120, 0x3a0aa6c3, 0xf6a0a65d, + 0xaa4de78c, 0x66e7e712, 0xe868e0f1, 0x24c2e06f, 0x2e07e976, + 0xe2ade9e8, 0x6c22ee0b, 0xa088ee95, 0x79a8fc39, 0xb502fca7, + 0x3b8dfb44, 0xf727fbda, 0xfde2f2c3, 0x3148f25d, 0xbfc7f5be, + 0x736df520, 0xd6f6d6a7, 0x1a5cd639, 0x94d3d1da, 0x5879d144, + 0x52bcd85d, 0x9e16d8c3, 0x1099df20, 0xdc33dfbe, 0x0513cd12, + 0xc9b9cd8c, 0x4736ca6f, 0x8b9ccaf1, 0x8159c3e8, 0x4df3c376, + 0xc37cc495, 0x0fd6c40b, 0x7aa64737, 0xb60c47a9, 0x3883404a, + 0xf42940d4, 0xfeec49cd, 0x32464953, 0xbcc94eb0, 0x70634e2e, + 0xa9435c82, 0x65e95c1c, 0xeb665bff, 0x27cc5b61, 0x2d095278, + 0xe1a352e6, 0x6f2c5505, 0xa386559b, 0x061d761c, 0xcab77682, + 0x44387161, 0x889271ff, 0x825778e6, 0x4efd7878, 0xc0727f9b, + 0x0cd87f05, 0xd5f86da9, 0x19526d37, 0x97dd6ad4, 0x5b776a4a, + 0x51b26353, 0x9d1863cd, 0x1397642e, 0xdf3d64b0, 0x83d02561, + 0x4f7a25ff, 0xc1f5221c, 0x0d5f2282, 0x079a2b9b, 0xcb302b05, + 0x45bf2ce6, 0x89152c78, 0x50353ed4, 0x9c9f3e4a, 0x121039a9, + 0xdeba3937, 0xd47f302e, 0x18d530b0, 0x965a3753, 0x5af037cd, + 0xff6b144a, 0x33c114d4, 0xbd4e1337, 0x71e413a9, 0x7b211ab0, + 0xb78b1a2e, 0x39041dcd, 0xf5ae1d53, 0x2c8e0fff, 0xe0240f61, + 0x6eab0882, 0xa201081c, 0xa8c40105, 0x646e019b, 0xeae10678, + 0x264b06e6}, + {0x00000000, 0xa6770bb4, 0x979f1129, 0x31e81a9d, 0xf44f2413, + 0x52382fa7, 0x63d0353a, 0xc5a73e8e, 0x33ef4e67, 0x959845d3, + 0xa4705f4e, 0x020754fa, 0xc7a06a74, 0x61d761c0, 0x503f7b5d, + 0xf64870e9, 0x67de9cce, 0xc1a9977a, 0xf0418de7, 0x56368653, + 0x9391b8dd, 0x35e6b369, 0x040ea9f4, 0xa279a240, 0x5431d2a9, + 0xf246d91d, 0xc3aec380, 0x65d9c834, 0xa07ef6ba, 0x0609fd0e, + 0x37e1e793, 0x9196ec27, 0xcfbd399c, 0x69ca3228, 0x582228b5, + 0xfe552301, 0x3bf21d8f, 0x9d85163b, 0xac6d0ca6, 0x0a1a0712, + 0xfc5277fb, 0x5a257c4f, 0x6bcd66d2, 0xcdba6d66, 0x081d53e8, + 0xae6a585c, 0x9f8242c1, 0x39f54975, 0xa863a552, 0x0e14aee6, + 0x3ffcb47b, 0x998bbfcf, 0x5c2c8141, 0xfa5b8af5, 0xcbb39068, + 0x6dc49bdc, 0x9b8ceb35, 0x3dfbe081, 0x0c13fa1c, 0xaa64f1a8, + 0x6fc3cf26, 0xc9b4c492, 0xf85cde0f, 0x5e2bd5bb, 0x440b7579, + 0xe27c7ecd, 0xd3946450, 0x75e36fe4, 0xb044516a, 0x16335ade, + 0x27db4043, 0x81ac4bf7, 0x77e43b1e, 0xd19330aa, 0xe07b2a37, + 0x460c2183, 0x83ab1f0d, 0x25dc14b9, 0x14340e24, 0xb2430590, + 0x23d5e9b7, 0x85a2e203, 0xb44af89e, 0x123df32a, 0xd79acda4, + 0x71edc610, 0x4005dc8d, 0xe672d739, 0x103aa7d0, 0xb64dac64, + 0x87a5b6f9, 0x21d2bd4d, 0xe47583c3, 0x42028877, 0x73ea92ea, + 0xd59d995e, 0x8bb64ce5, 0x2dc14751, 0x1c295dcc, 0xba5e5678, + 0x7ff968f6, 0xd98e6342, 0xe86679df, 0x4e11726b, 0xb8590282, + 0x1e2e0936, 0x2fc613ab, 0x89b1181f, 0x4c162691, 0xea612d25, + 0xdb8937b8, 0x7dfe3c0c, 0xec68d02b, 0x4a1fdb9f, 0x7bf7c102, + 0xdd80cab6, 0x1827f438, 0xbe50ff8c, 0x8fb8e511, 0x29cfeea5, + 0xdf879e4c, 0x79f095f8, 0x48188f65, 0xee6f84d1, 0x2bc8ba5f, + 0x8dbfb1eb, 0xbc57ab76, 0x1a20a0c2, 0x8816eaf2, 0x2e61e146, + 0x1f89fbdb, 0xb9fef06f, 0x7c59cee1, 0xda2ec555, 0xebc6dfc8, + 0x4db1d47c, 0xbbf9a495, 0x1d8eaf21, 0x2c66b5bc, 0x8a11be08, + 0x4fb68086, 0xe9c18b32, 0xd82991af, 0x7e5e9a1b, 0xefc8763c, + 0x49bf7d88, 0x78576715, 0xde206ca1, 0x1b87522f, 0xbdf0599b, + 0x8c184306, 0x2a6f48b2, 0xdc27385b, 0x7a5033ef, 0x4bb82972, + 0xedcf22c6, 0x28681c48, 0x8e1f17fc, 0xbff70d61, 0x198006d5, + 0x47abd36e, 0xe1dcd8da, 0xd034c247, 0x7643c9f3, 0xb3e4f77d, + 0x1593fcc9, 0x247be654, 0x820cede0, 0x74449d09, 0xd23396bd, + 0xe3db8c20, 0x45ac8794, 0x800bb91a, 0x267cb2ae, 0x1794a833, + 0xb1e3a387, 0x20754fa0, 0x86024414, 0xb7ea5e89, 0x119d553d, + 0xd43a6bb3, 0x724d6007, 0x43a57a9a, 0xe5d2712e, 0x139a01c7, + 0xb5ed0a73, 0x840510ee, 0x22721b5a, 0xe7d525d4, 0x41a22e60, + 0x704a34fd, 0xd63d3f49, 0xcc1d9f8b, 0x6a6a943f, 0x5b828ea2, + 0xfdf58516, 0x3852bb98, 0x9e25b02c, 0xafcdaab1, 0x09baa105, + 0xfff2d1ec, 0x5985da58, 0x686dc0c5, 0xce1acb71, 0x0bbdf5ff, + 0xadcafe4b, 0x9c22e4d6, 0x3a55ef62, 0xabc30345, 0x0db408f1, + 0x3c5c126c, 0x9a2b19d8, 0x5f8c2756, 0xf9fb2ce2, 0xc813367f, + 0x6e643dcb, 0x982c4d22, 0x3e5b4696, 0x0fb35c0b, 0xa9c457bf, + 0x6c636931, 0xca146285, 0xfbfc7818, 0x5d8b73ac, 0x03a0a617, + 0xa5d7ada3, 0x943fb73e, 0x3248bc8a, 0xf7ef8204, 0x519889b0, + 0x6070932d, 0xc6079899, 0x304fe870, 0x9638e3c4, 0xa7d0f959, + 0x01a7f2ed, 0xc400cc63, 0x6277c7d7, 0x539fdd4a, 0xf5e8d6fe, + 0x647e3ad9, 0xc209316d, 0xf3e12bf0, 0x55962044, 0x90311eca, + 0x3646157e, 0x07ae0fe3, 0xa1d90457, 0x579174be, 0xf1e67f0a, + 0xc00e6597, 0x66796e23, 0xa3de50ad, 0x05a95b19, 0x34414184, + 0x92364a30}, + {0x00000000, 0xcb5cd3a5, 0x4dc8a10b, 0x869472ae, 0x9b914216, + 0x50cd91b3, 0xd659e31d, 0x1d0530b8, 0xec53826d, 0x270f51c8, + 0xa19b2366, 0x6ac7f0c3, 0x77c2c07b, 0xbc9e13de, 0x3a0a6170, + 0xf156b2d5, 0x03d6029b, 0xc88ad13e, 0x4e1ea390, 0x85427035, + 0x9847408d, 0x531b9328, 0xd58fe186, 0x1ed33223, 0xef8580f6, + 0x24d95353, 0xa24d21fd, 0x6911f258, 0x7414c2e0, 0xbf481145, + 0x39dc63eb, 0xf280b04e, 0x07ac0536, 0xccf0d693, 0x4a64a43d, + 0x81387798, 0x9c3d4720, 0x57619485, 0xd1f5e62b, 0x1aa9358e, + 0xebff875b, 0x20a354fe, 0xa6372650, 0x6d6bf5f5, 0x706ec54d, + 0xbb3216e8, 0x3da66446, 0xf6fab7e3, 0x047a07ad, 0xcf26d408, + 0x49b2a6a6, 0x82ee7503, 0x9feb45bb, 0x54b7961e, 0xd223e4b0, + 0x197f3715, 0xe82985c0, 0x23755665, 0xa5e124cb, 0x6ebdf76e, + 0x73b8c7d6, 0xb8e41473, 0x3e7066dd, 0xf52cb578, 0x0f580a6c, + 0xc404d9c9, 0x4290ab67, 0x89cc78c2, 0x94c9487a, 0x5f959bdf, + 0xd901e971, 0x125d3ad4, 0xe30b8801, 0x28575ba4, 0xaec3290a, + 0x659ffaaf, 0x789aca17, 0xb3c619b2, 0x35526b1c, 0xfe0eb8b9, + 0x0c8e08f7, 0xc7d2db52, 0x4146a9fc, 0x8a1a7a59, 0x971f4ae1, + 0x5c439944, 0xdad7ebea, 0x118b384f, 0xe0dd8a9a, 0x2b81593f, + 0xad152b91, 0x6649f834, 0x7b4cc88c, 0xb0101b29, 0x36846987, + 0xfdd8ba22, 0x08f40f5a, 0xc3a8dcff, 0x453cae51, 0x8e607df4, + 0x93654d4c, 0x58399ee9, 0xdeadec47, 0x15f13fe2, 0xe4a78d37, + 0x2ffb5e92, 0xa96f2c3c, 0x6233ff99, 0x7f36cf21, 0xb46a1c84, + 0x32fe6e2a, 0xf9a2bd8f, 0x0b220dc1, 0xc07ede64, 0x46eaacca, + 0x8db67f6f, 0x90b34fd7, 0x5bef9c72, 0xdd7beedc, 0x16273d79, + 0xe7718fac, 0x2c2d5c09, 0xaab92ea7, 0x61e5fd02, 0x7ce0cdba, + 0xb7bc1e1f, 0x31286cb1, 0xfa74bf14, 0x1eb014d8, 0xd5ecc77d, + 0x5378b5d3, 0x98246676, 0x852156ce, 0x4e7d856b, 0xc8e9f7c5, + 0x03b52460, 0xf2e396b5, 0x39bf4510, 0xbf2b37be, 0x7477e41b, + 0x6972d4a3, 0xa22e0706, 0x24ba75a8, 0xefe6a60d, 0x1d661643, + 0xd63ac5e6, 0x50aeb748, 0x9bf264ed, 0x86f75455, 0x4dab87f0, + 0xcb3ff55e, 0x006326fb, 0xf135942e, 0x3a69478b, 0xbcfd3525, + 0x77a1e680, 0x6aa4d638, 0xa1f8059d, 0x276c7733, 0xec30a496, + 0x191c11ee, 0xd240c24b, 0x54d4b0e5, 0x9f886340, 0x828d53f8, + 0x49d1805d, 0xcf45f2f3, 0x04192156, 0xf54f9383, 0x3e134026, + 0xb8873288, 0x73dbe12d, 0x6eded195, 0xa5820230, 0x2316709e, + 0xe84aa33b, 0x1aca1375, 0xd196c0d0, 0x5702b27e, 0x9c5e61db, + 0x815b5163, 0x4a0782c6, 0xcc93f068, 0x07cf23cd, 0xf6999118, + 0x3dc542bd, 0xbb513013, 0x700de3b6, 0x6d08d30e, 0xa65400ab, + 0x20c07205, 0xeb9ca1a0, 0x11e81eb4, 0xdab4cd11, 0x5c20bfbf, + 0x977c6c1a, 0x8a795ca2, 0x41258f07, 0xc7b1fda9, 0x0ced2e0c, + 0xfdbb9cd9, 0x36e74f7c, 0xb0733dd2, 0x7b2fee77, 0x662adecf, + 0xad760d6a, 0x2be27fc4, 0xe0beac61, 0x123e1c2f, 0xd962cf8a, + 0x5ff6bd24, 0x94aa6e81, 0x89af5e39, 0x42f38d9c, 0xc467ff32, + 0x0f3b2c97, 0xfe6d9e42, 0x35314de7, 0xb3a53f49, 0x78f9ecec, + 0x65fcdc54, 0xaea00ff1, 0x28347d5f, 0xe368aefa, 0x16441b82, + 0xdd18c827, 0x5b8cba89, 0x90d0692c, 0x8dd55994, 0x46898a31, + 0xc01df89f, 0x0b412b3a, 0xfa1799ef, 0x314b4a4a, 0xb7df38e4, + 0x7c83eb41, 0x6186dbf9, 0xaada085c, 0x2c4e7af2, 0xe712a957, + 0x15921919, 0xdececabc, 0x585ab812, 0x93066bb7, 0x8e035b0f, + 0x455f88aa, 0xc3cbfa04, 0x089729a1, 0xf9c19b74, 0x329d48d1, + 0xb4093a7f, 0x7f55e9da, 0x6250d962, 0xa90c0ac7, 0x2f987869, + 0xe4c4abcc}, + {0x00000000, 0x3d6029b0, 0x7ac05360, 0x47a07ad0, 0xf580a6c0, + 0xc8e08f70, 0x8f40f5a0, 0xb220dc10, 0x30704bc1, 0x0d106271, + 0x4ab018a1, 0x77d03111, 0xc5f0ed01, 0xf890c4b1, 0xbf30be61, + 0x825097d1, 0x60e09782, 0x5d80be32, 0x1a20c4e2, 0x2740ed52, + 0x95603142, 0xa80018f2, 0xefa06222, 0xd2c04b92, 0x5090dc43, + 0x6df0f5f3, 0x2a508f23, 0x1730a693, 0xa5107a83, 0x98705333, + 0xdfd029e3, 0xe2b00053, 0xc1c12f04, 0xfca106b4, 0xbb017c64, + 0x866155d4, 0x344189c4, 0x0921a074, 0x4e81daa4, 0x73e1f314, + 0xf1b164c5, 0xccd14d75, 0x8b7137a5, 0xb6111e15, 0x0431c205, + 0x3951ebb5, 0x7ef19165, 0x4391b8d5, 0xa121b886, 0x9c419136, + 0xdbe1ebe6, 0xe681c256, 0x54a11e46, 0x69c137f6, 0x2e614d26, + 0x13016496, 0x9151f347, 0xac31daf7, 0xeb91a027, 0xd6f18997, + 0x64d15587, 0x59b17c37, 0x1e1106e7, 0x23712f57, 0x58f35849, + 0x659371f9, 0x22330b29, 0x1f532299, 0xad73fe89, 0x9013d739, + 0xd7b3ade9, 0xead38459, 0x68831388, 0x55e33a38, 0x124340e8, + 0x2f236958, 0x9d03b548, 0xa0639cf8, 0xe7c3e628, 0xdaa3cf98, + 0x3813cfcb, 0x0573e67b, 0x42d39cab, 0x7fb3b51b, 0xcd93690b, + 0xf0f340bb, 0xb7533a6b, 0x8a3313db, 0x0863840a, 0x3503adba, + 0x72a3d76a, 0x4fc3feda, 0xfde322ca, 0xc0830b7a, 0x872371aa, + 0xba43581a, 0x9932774d, 0xa4525efd, 0xe3f2242d, 0xde920d9d, + 0x6cb2d18d, 0x51d2f83d, 0x167282ed, 0x2b12ab5d, 0xa9423c8c, + 0x9422153c, 0xd3826fec, 0xeee2465c, 0x5cc29a4c, 0x61a2b3fc, + 0x2602c92c, 0x1b62e09c, 0xf9d2e0cf, 0xc4b2c97f, 0x8312b3af, + 0xbe729a1f, 0x0c52460f, 0x31326fbf, 0x7692156f, 0x4bf23cdf, + 0xc9a2ab0e, 0xf4c282be, 0xb362f86e, 0x8e02d1de, 0x3c220dce, + 0x0142247e, 0x46e25eae, 0x7b82771e, 0xb1e6b092, 0x8c869922, + 0xcb26e3f2, 0xf646ca42, 0x44661652, 0x79063fe2, 0x3ea64532, + 0x03c66c82, 0x8196fb53, 0xbcf6d2e3, 0xfb56a833, 0xc6368183, + 0x74165d93, 0x49767423, 0x0ed60ef3, 0x33b62743, 0xd1062710, + 0xec660ea0, 0xabc67470, 0x96a65dc0, 0x248681d0, 0x19e6a860, + 0x5e46d2b0, 0x6326fb00, 0xe1766cd1, 0xdc164561, 0x9bb63fb1, + 0xa6d61601, 0x14f6ca11, 0x2996e3a1, 0x6e369971, 0x5356b0c1, + 0x70279f96, 0x4d47b626, 0x0ae7ccf6, 0x3787e546, 0x85a73956, + 0xb8c710e6, 0xff676a36, 0xc2074386, 0x4057d457, 0x7d37fde7, + 0x3a978737, 0x07f7ae87, 0xb5d77297, 0x88b75b27, 0xcf1721f7, + 0xf2770847, 0x10c70814, 0x2da721a4, 0x6a075b74, 0x576772c4, + 0xe547aed4, 0xd8278764, 0x9f87fdb4, 0xa2e7d404, 0x20b743d5, + 0x1dd76a65, 0x5a7710b5, 0x67173905, 0xd537e515, 0xe857cca5, + 0xaff7b675, 0x92979fc5, 0xe915e8db, 0xd475c16b, 0x93d5bbbb, + 0xaeb5920b, 0x1c954e1b, 0x21f567ab, 0x66551d7b, 0x5b3534cb, + 0xd965a31a, 0xe4058aaa, 0xa3a5f07a, 0x9ec5d9ca, 0x2ce505da, + 0x11852c6a, 0x562556ba, 0x6b457f0a, 0x89f57f59, 0xb49556e9, + 0xf3352c39, 0xce550589, 0x7c75d999, 0x4115f029, 0x06b58af9, + 0x3bd5a349, 0xb9853498, 0x84e51d28, 0xc34567f8, 0xfe254e48, + 0x4c059258, 0x7165bbe8, 0x36c5c138, 0x0ba5e888, 0x28d4c7df, + 0x15b4ee6f, 0x521494bf, 0x6f74bd0f, 0xdd54611f, 0xe03448af, + 0xa794327f, 0x9af41bcf, 0x18a48c1e, 0x25c4a5ae, 0x6264df7e, + 0x5f04f6ce, 0xed242ade, 0xd044036e, 0x97e479be, 0xaa84500e, + 0x4834505d, 0x755479ed, 0x32f4033d, 0x0f942a8d, 0xbdb4f69d, + 0x80d4df2d, 0xc774a5fd, 0xfa148c4d, 0x78441b9c, 0x4524322c, + 0x028448fc, 0x3fe4614c, 0x8dc4bd5c, 0xb0a494ec, 0xf704ee3c, + 0xca64c78c}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0xb029603d, 0x6053c07a, 0xd07aa047, 0xc0a680f5, + 0x708fe0c8, 0xa0f5408f, 0x10dc20b2, 0xc14b7030, 0x7162100d, + 0xa118b04a, 0x1131d077, 0x01edf0c5, 0xb1c490f8, 0x61be30bf, + 0xd1975082, 0x8297e060, 0x32be805d, 0xe2c4201a, 0x52ed4027, + 0x42316095, 0xf21800a8, 0x2262a0ef, 0x924bc0d2, 0x43dc9050, + 0xf3f5f06d, 0x238f502a, 0x93a63017, 0x837a10a5, 0x33537098, + 0xe329d0df, 0x5300b0e2, 0x042fc1c1, 0xb406a1fc, 0x647c01bb, + 0xd4556186, 0xc4894134, 0x74a02109, 0xa4da814e, 0x14f3e173, + 0xc564b1f1, 0x754dd1cc, 0xa537718b, 0x151e11b6, 0x05c23104, + 0xb5eb5139, 0x6591f17e, 0xd5b89143, 0x86b821a1, 0x3691419c, + 0xe6ebe1db, 0x56c281e6, 0x461ea154, 0xf637c169, 0x264d612e, + 0x96640113, 0x47f35191, 0xf7da31ac, 0x27a091eb, 0x9789f1d6, + 0x8755d164, 0x377cb159, 0xe706111e, 0x572f7123, 0x4958f358, + 0xf9719365, 0x290b3322, 0x9922531f, 0x89fe73ad, 0x39d71390, + 0xe9adb3d7, 0x5984d3ea, 0x88138368, 0x383ae355, 0xe8404312, + 0x5869232f, 0x48b5039d, 0xf89c63a0, 0x28e6c3e7, 0x98cfa3da, + 0xcbcf1338, 0x7be67305, 0xab9cd342, 0x1bb5b37f, 0x0b6993cd, + 0xbb40f3f0, 0x6b3a53b7, 0xdb13338a, 0x0a846308, 0xbaad0335, + 0x6ad7a372, 0xdafec34f, 0xca22e3fd, 0x7a0b83c0, 0xaa712387, + 0x1a5843ba, 0x4d773299, 0xfd5e52a4, 0x2d24f2e3, 0x9d0d92de, + 0x8dd1b26c, 0x3df8d251, 0xed827216, 0x5dab122b, 0x8c3c42a9, + 0x3c152294, 0xec6f82d3, 0x5c46e2ee, 0x4c9ac25c, 0xfcb3a261, + 0x2cc90226, 0x9ce0621b, 0xcfe0d2f9, 0x7fc9b2c4, 0xafb31283, + 0x1f9a72be, 0x0f46520c, 0xbf6f3231, 0x6f159276, 0xdf3cf24b, + 0x0eaba2c9, 0xbe82c2f4, 0x6ef862b3, 0xded1028e, 0xce0d223c, + 0x7e244201, 0xae5ee246, 0x1e77827b, 0x92b0e6b1, 0x2299868c, + 0xf2e326cb, 0x42ca46f6, 0x52166644, 0xe23f0679, 0x3245a63e, + 0x826cc603, 0x53fb9681, 0xe3d2f6bc, 0x33a856fb, 0x838136c6, + 0x935d1674, 0x23747649, 0xf30ed60e, 0x4327b633, 0x102706d1, + 0xa00e66ec, 0x7074c6ab, 0xc05da696, 0xd0818624, 0x60a8e619, + 0xb0d2465e, 0x00fb2663, 0xd16c76e1, 0x614516dc, 0xb13fb69b, + 0x0116d6a6, 0x11caf614, 0xa1e39629, 0x7199366e, 0xc1b05653, + 0x969f2770, 0x26b6474d, 0xf6cce70a, 0x46e58737, 0x5639a785, + 0xe610c7b8, 0x366a67ff, 0x864307c2, 0x57d45740, 0xe7fd377d, + 0x3787973a, 0x87aef707, 0x9772d7b5, 0x275bb788, 0xf72117cf, + 0x470877f2, 0x1408c710, 0xa421a72d, 0x745b076a, 0xc4726757, + 0xd4ae47e5, 0x648727d8, 0xb4fd879f, 0x04d4e7a2, 0xd543b720, + 0x656ad71d, 0xb510775a, 0x05391767, 0x15e537d5, 0xa5cc57e8, + 0x75b6f7af, 0xc59f9792, 0xdbe815e9, 0x6bc175d4, 0xbbbbd593, + 0x0b92b5ae, 0x1b4e951c, 0xab67f521, 0x7b1d5566, 0xcb34355b, + 0x1aa365d9, 0xaa8a05e4, 0x7af0a5a3, 0xcad9c59e, 0xda05e52c, + 0x6a2c8511, 0xba562556, 0x0a7f456b, 0x597ff589, 0xe95695b4, + 0x392c35f3, 0x890555ce, 0x99d9757c, 0x29f01541, 0xf98ab506, + 0x49a3d53b, 0x983485b9, 0x281de584, 0xf86745c3, 0x484e25fe, + 0x5892054c, 0xe8bb6571, 0x38c1c536, 0x88e8a50b, 0xdfc7d428, + 0x6feeb415, 0xbf941452, 0x0fbd746f, 0x1f6154dd, 0xaf4834e0, + 0x7f3294a7, 0xcf1bf49a, 0x1e8ca418, 0xaea5c425, 0x7edf6462, + 0xcef6045f, 0xde2a24ed, 0x6e0344d0, 0xbe79e497, 0x0e5084aa, + 0x5d503448, 0xed795475, 0x3d03f432, 0x8d2a940f, 0x9df6b4bd, + 0x2ddfd480, 0xfda574c7, 0x4d8c14fa, 0x9c1b4478, 0x2c322445, + 0xfc488402, 0x4c61e43f, 0x5cbdc48d, 0xec94a4b0, 0x3cee04f7, + 0x8cc764ca}, + {0x00000000, 0xa5d35ccb, 0x0ba1c84d, 0xae729486, 0x1642919b, + 0xb391cd50, 0x1de359d6, 0xb830051d, 0x6d8253ec, 0xc8510f27, + 0x66239ba1, 0xc3f0c76a, 0x7bc0c277, 0xde139ebc, 0x70610a3a, + 0xd5b256f1, 0x9b02d603, 0x3ed18ac8, 0x90a31e4e, 0x35704285, + 0x8d404798, 0x28931b53, 0x86e18fd5, 0x2332d31e, 0xf68085ef, + 0x5353d924, 0xfd214da2, 0x58f21169, 0xe0c21474, 0x451148bf, + 0xeb63dc39, 0x4eb080f2, 0x3605ac07, 0x93d6f0cc, 0x3da4644a, + 0x98773881, 0x20473d9c, 0x85946157, 0x2be6f5d1, 0x8e35a91a, + 0x5b87ffeb, 0xfe54a320, 0x502637a6, 0xf5f56b6d, 0x4dc56e70, + 0xe81632bb, 0x4664a63d, 0xe3b7faf6, 0xad077a04, 0x08d426cf, + 0xa6a6b249, 0x0375ee82, 0xbb45eb9f, 0x1e96b754, 0xb0e423d2, + 0x15377f19, 0xc08529e8, 0x65567523, 0xcb24e1a5, 0x6ef7bd6e, + 0xd6c7b873, 0x7314e4b8, 0xdd66703e, 0x78b52cf5, 0x6c0a580f, + 0xc9d904c4, 0x67ab9042, 0xc278cc89, 0x7a48c994, 0xdf9b955f, + 0x71e901d9, 0xd43a5d12, 0x01880be3, 0xa45b5728, 0x0a29c3ae, + 0xaffa9f65, 0x17ca9a78, 0xb219c6b3, 0x1c6b5235, 0xb9b80efe, + 0xf7088e0c, 0x52dbd2c7, 0xfca94641, 0x597a1a8a, 0xe14a1f97, + 0x4499435c, 0xeaebd7da, 0x4f388b11, 0x9a8adde0, 0x3f59812b, + 0x912b15ad, 0x34f84966, 0x8cc84c7b, 0x291b10b0, 0x87698436, + 0x22bad8fd, 0x5a0ff408, 0xffdca8c3, 0x51ae3c45, 0xf47d608e, + 0x4c4d6593, 0xe99e3958, 0x47ecadde, 0xe23ff115, 0x378da7e4, + 0x925efb2f, 0x3c2c6fa9, 0x99ff3362, 0x21cf367f, 0x841c6ab4, + 0x2a6efe32, 0x8fbda2f9, 0xc10d220b, 0x64de7ec0, 0xcaacea46, + 0x6f7fb68d, 0xd74fb390, 0x729cef5b, 0xdcee7bdd, 0x793d2716, + 0xac8f71e7, 0x095c2d2c, 0xa72eb9aa, 0x02fde561, 0xbacde07c, + 0x1f1ebcb7, 0xb16c2831, 0x14bf74fa, 0xd814b01e, 0x7dc7ecd5, + 0xd3b57853, 0x76662498, 0xce562185, 0x6b857d4e, 0xc5f7e9c8, + 0x6024b503, 0xb596e3f2, 0x1045bf39, 0xbe372bbf, 0x1be47774, + 0xa3d47269, 0x06072ea2, 0xa875ba24, 0x0da6e6ef, 0x4316661d, + 0xe6c53ad6, 0x48b7ae50, 0xed64f29b, 0x5554f786, 0xf087ab4d, + 0x5ef53fcb, 0xfb266300, 0x2e9435f1, 0x8b47693a, 0x2535fdbc, + 0x80e6a177, 0x38d6a46a, 0x9d05f8a1, 0x33776c27, 0x96a430ec, + 0xee111c19, 0x4bc240d2, 0xe5b0d454, 0x4063889f, 0xf8538d82, + 0x5d80d149, 0xf3f245cf, 0x56211904, 0x83934ff5, 0x2640133e, + 0x883287b8, 0x2de1db73, 0x95d1de6e, 0x300282a5, 0x9e701623, + 0x3ba34ae8, 0x7513ca1a, 0xd0c096d1, 0x7eb20257, 0xdb615e9c, + 0x63515b81, 0xc682074a, 0x68f093cc, 0xcd23cf07, 0x189199f6, + 0xbd42c53d, 0x133051bb, 0xb6e30d70, 0x0ed3086d, 0xab0054a6, + 0x0572c020, 0xa0a19ceb, 0xb41ee811, 0x11cdb4da, 0xbfbf205c, + 0x1a6c7c97, 0xa25c798a, 0x078f2541, 0xa9fdb1c7, 0x0c2eed0c, + 0xd99cbbfd, 0x7c4fe736, 0xd23d73b0, 0x77ee2f7b, 0xcfde2a66, + 0x6a0d76ad, 0xc47fe22b, 0x61acbee0, 0x2f1c3e12, 0x8acf62d9, + 0x24bdf65f, 0x816eaa94, 0x395eaf89, 0x9c8df342, 0x32ff67c4, + 0x972c3b0f, 0x429e6dfe, 0xe74d3135, 0x493fa5b3, 0xececf978, + 0x54dcfc65, 0xf10fa0ae, 0x5f7d3428, 0xfaae68e3, 0x821b4416, + 0x27c818dd, 0x89ba8c5b, 0x2c69d090, 0x9459d58d, 0x318a8946, + 0x9ff81dc0, 0x3a2b410b, 0xef9917fa, 0x4a4a4b31, 0xe438dfb7, + 0x41eb837c, 0xf9db8661, 0x5c08daaa, 0xf27a4e2c, 0x57a912e7, + 0x19199215, 0xbccacede, 0x12b85a58, 0xb76b0693, 0x0f5b038e, + 0xaa885f45, 0x04facbc3, 0xa1299708, 0x749bc1f9, 0xd1489d32, + 0x7f3a09b4, 0xdae9557f, 0x62d95062, 0xc70a0ca9, 0x6978982f, + 0xccabc4e4}, + {0x00000000, 0xb40b77a6, 0x29119f97, 0x9d1ae831, 0x13244ff4, + 0xa72f3852, 0x3a35d063, 0x8e3ea7c5, 0x674eef33, 0xd3459895, + 0x4e5f70a4, 0xfa540702, 0x746aa0c7, 0xc061d761, 0x5d7b3f50, + 0xe97048f6, 0xce9cde67, 0x7a97a9c1, 0xe78d41f0, 0x53863656, + 0xddb89193, 0x69b3e635, 0xf4a90e04, 0x40a279a2, 0xa9d23154, + 0x1dd946f2, 0x80c3aec3, 0x34c8d965, 0xbaf67ea0, 0x0efd0906, + 0x93e7e137, 0x27ec9691, 0x9c39bdcf, 0x2832ca69, 0xb5282258, + 0x012355fe, 0x8f1df23b, 0x3b16859d, 0xa60c6dac, 0x12071a0a, + 0xfb7752fc, 0x4f7c255a, 0xd266cd6b, 0x666dbacd, 0xe8531d08, + 0x5c586aae, 0xc142829f, 0x7549f539, 0x52a563a8, 0xe6ae140e, + 0x7bb4fc3f, 0xcfbf8b99, 0x41812c5c, 0xf58a5bfa, 0x6890b3cb, + 0xdc9bc46d, 0x35eb8c9b, 0x81e0fb3d, 0x1cfa130c, 0xa8f164aa, + 0x26cfc36f, 0x92c4b4c9, 0x0fde5cf8, 0xbbd52b5e, 0x79750b44, + 0xcd7e7ce2, 0x506494d3, 0xe46fe375, 0x6a5144b0, 0xde5a3316, + 0x4340db27, 0xf74bac81, 0x1e3be477, 0xaa3093d1, 0x372a7be0, + 0x83210c46, 0x0d1fab83, 0xb914dc25, 0x240e3414, 0x900543b2, + 0xb7e9d523, 0x03e2a285, 0x9ef84ab4, 0x2af33d12, 0xa4cd9ad7, + 0x10c6ed71, 0x8ddc0540, 0x39d772e6, 0xd0a73a10, 0x64ac4db6, + 0xf9b6a587, 0x4dbdd221, 0xc38375e4, 0x77880242, 0xea92ea73, + 0x5e999dd5, 0xe54cb68b, 0x5147c12d, 0xcc5d291c, 0x78565eba, + 0xf668f97f, 0x42638ed9, 0xdf7966e8, 0x6b72114e, 0x820259b8, + 0x36092e1e, 0xab13c62f, 0x1f18b189, 0x9126164c, 0x252d61ea, + 0xb83789db, 0x0c3cfe7d, 0x2bd068ec, 0x9fdb1f4a, 0x02c1f77b, + 0xb6ca80dd, 0x38f42718, 0x8cff50be, 0x11e5b88f, 0xa5eecf29, + 0x4c9e87df, 0xf895f079, 0x658f1848, 0xd1846fee, 0x5fbac82b, + 0xebb1bf8d, 0x76ab57bc, 0xc2a0201a, 0xf2ea1688, 0x46e1612e, + 0xdbfb891f, 0x6ff0feb9, 0xe1ce597c, 0x55c52eda, 0xc8dfc6eb, + 0x7cd4b14d, 0x95a4f9bb, 0x21af8e1d, 0xbcb5662c, 0x08be118a, + 0x8680b64f, 0x328bc1e9, 0xaf9129d8, 0x1b9a5e7e, 0x3c76c8ef, + 0x887dbf49, 0x15675778, 0xa16c20de, 0x2f52871b, 0x9b59f0bd, + 0x0643188c, 0xb2486f2a, 0x5b3827dc, 0xef33507a, 0x7229b84b, + 0xc622cfed, 0x481c6828, 0xfc171f8e, 0x610df7bf, 0xd5068019, + 0x6ed3ab47, 0xdad8dce1, 0x47c234d0, 0xf3c94376, 0x7df7e4b3, + 0xc9fc9315, 0x54e67b24, 0xe0ed0c82, 0x099d4474, 0xbd9633d2, + 0x208cdbe3, 0x9487ac45, 0x1ab90b80, 0xaeb27c26, 0x33a89417, + 0x87a3e3b1, 0xa04f7520, 0x14440286, 0x895eeab7, 0x3d559d11, + 0xb36b3ad4, 0x07604d72, 0x9a7aa543, 0x2e71d2e5, 0xc7019a13, + 0x730aedb5, 0xee100584, 0x5a1b7222, 0xd425d5e7, 0x602ea241, + 0xfd344a70, 0x493f3dd6, 0x8b9f1dcc, 0x3f946a6a, 0xa28e825b, + 0x1685f5fd, 0x98bb5238, 0x2cb0259e, 0xb1aacdaf, 0x05a1ba09, + 0xecd1f2ff, 0x58da8559, 0xc5c06d68, 0x71cb1ace, 0xfff5bd0b, + 0x4bfecaad, 0xd6e4229c, 0x62ef553a, 0x4503c3ab, 0xf108b40d, + 0x6c125c3c, 0xd8192b9a, 0x56278c5f, 0xe22cfbf9, 0x7f3613c8, + 0xcb3d646e, 0x224d2c98, 0x96465b3e, 0x0b5cb30f, 0xbf57c4a9, + 0x3169636c, 0x856214ca, 0x1878fcfb, 0xac738b5d, 0x17a6a003, + 0xa3add7a5, 0x3eb73f94, 0x8abc4832, 0x0482eff7, 0xb0899851, + 0x2d937060, 0x999807c6, 0x70e84f30, 0xc4e33896, 0x59f9d0a7, + 0xedf2a701, 0x63cc00c4, 0xd7c77762, 0x4add9f53, 0xfed6e8f5, + 0xd93a7e64, 0x6d3109c2, 0xf02be1f3, 0x44209655, 0xca1e3190, + 0x7e154636, 0xe30fae07, 0x5704d9a1, 0xbe749157, 0x0a7fe6f1, + 0x97650ec0, 0x236e7966, 0xad50dea3, 0x195ba905, 0x84414134, + 0x304a3692}, + {0x00000000, 0x9e00aacc, 0x7d072542, 0xe3078f8e, 0xfa0e4a84, + 0x640ee048, 0x87096fc6, 0x1909c50a, 0xb51be5d3, 0x2b1b4f1f, + 0xc81cc091, 0x561c6a5d, 0x4f15af57, 0xd115059b, 0x32128a15, + 0xac1220d9, 0x2b31bb7c, 0xb53111b0, 0x56369e3e, 0xc83634f2, + 0xd13ff1f8, 0x4f3f5b34, 0xac38d4ba, 0x32387e76, 0x9e2a5eaf, + 0x002af463, 0xe32d7bed, 0x7d2dd121, 0x6424142b, 0xfa24bee7, + 0x19233169, 0x87239ba5, 0x566276f9, 0xc862dc35, 0x2b6553bb, + 0xb565f977, 0xac6c3c7d, 0x326c96b1, 0xd16b193f, 0x4f6bb3f3, + 0xe379932a, 0x7d7939e6, 0x9e7eb668, 0x007e1ca4, 0x1977d9ae, + 0x87777362, 0x6470fcec, 0xfa705620, 0x7d53cd85, 0xe3536749, + 0x0054e8c7, 0x9e54420b, 0x875d8701, 0x195d2dcd, 0xfa5aa243, + 0x645a088f, 0xc8482856, 0x5648829a, 0xb54f0d14, 0x2b4fa7d8, + 0x324662d2, 0xac46c81e, 0x4f414790, 0xd141ed5c, 0xedc29d29, + 0x73c237e5, 0x90c5b86b, 0x0ec512a7, 0x17ccd7ad, 0x89cc7d61, + 0x6acbf2ef, 0xf4cb5823, 0x58d978fa, 0xc6d9d236, 0x25de5db8, + 0xbbdef774, 0xa2d7327e, 0x3cd798b2, 0xdfd0173c, 0x41d0bdf0, + 0xc6f32655, 0x58f38c99, 0xbbf40317, 0x25f4a9db, 0x3cfd6cd1, + 0xa2fdc61d, 0x41fa4993, 0xdffae35f, 0x73e8c386, 0xede8694a, + 0x0eefe6c4, 0x90ef4c08, 0x89e68902, 0x17e623ce, 0xf4e1ac40, + 0x6ae1068c, 0xbba0ebd0, 0x25a0411c, 0xc6a7ce92, 0x58a7645e, + 0x41aea154, 0xdfae0b98, 0x3ca98416, 0xa2a92eda, 0x0ebb0e03, + 0x90bba4cf, 0x73bc2b41, 0xedbc818d, 0xf4b54487, 0x6ab5ee4b, + 0x89b261c5, 0x17b2cb09, 0x909150ac, 0x0e91fa60, 0xed9675ee, + 0x7396df22, 0x6a9f1a28, 0xf49fb0e4, 0x17983f6a, 0x899895a6, + 0x258ab57f, 0xbb8a1fb3, 0x588d903d, 0xc68d3af1, 0xdf84fffb, + 0x41845537, 0xa283dab9, 0x3c837075, 0xda853b53, 0x4485919f, + 0xa7821e11, 0x3982b4dd, 0x208b71d7, 0xbe8bdb1b, 0x5d8c5495, + 0xc38cfe59, 0x6f9ede80, 0xf19e744c, 0x1299fbc2, 0x8c99510e, + 0x95909404, 0x0b903ec8, 0xe897b146, 0x76971b8a, 0xf1b4802f, + 0x6fb42ae3, 0x8cb3a56d, 0x12b30fa1, 0x0bbacaab, 0x95ba6067, + 0x76bdefe9, 0xe8bd4525, 0x44af65fc, 0xdaafcf30, 0x39a840be, + 0xa7a8ea72, 0xbea12f78, 0x20a185b4, 0xc3a60a3a, 0x5da6a0f6, + 0x8ce74daa, 0x12e7e766, 0xf1e068e8, 0x6fe0c224, 0x76e9072e, + 0xe8e9ade2, 0x0bee226c, 0x95ee88a0, 0x39fca879, 0xa7fc02b5, + 0x44fb8d3b, 0xdafb27f7, 0xc3f2e2fd, 0x5df24831, 0xbef5c7bf, + 0x20f56d73, 0xa7d6f6d6, 0x39d65c1a, 0xdad1d394, 0x44d17958, + 0x5dd8bc52, 0xc3d8169e, 0x20df9910, 0xbedf33dc, 0x12cd1305, + 0x8ccdb9c9, 0x6fca3647, 0xf1ca9c8b, 0xe8c35981, 0x76c3f34d, + 0x95c47cc3, 0x0bc4d60f, 0x3747a67a, 0xa9470cb6, 0x4a408338, + 0xd44029f4, 0xcd49ecfe, 0x53494632, 0xb04ec9bc, 0x2e4e6370, + 0x825c43a9, 0x1c5ce965, 0xff5b66eb, 0x615bcc27, 0x7852092d, + 0xe652a3e1, 0x05552c6f, 0x9b5586a3, 0x1c761d06, 0x8276b7ca, + 0x61713844, 0xff719288, 0xe6785782, 0x7878fd4e, 0x9b7f72c0, + 0x057fd80c, 0xa96df8d5, 0x376d5219, 0xd46add97, 0x4a6a775b, + 0x5363b251, 0xcd63189d, 0x2e649713, 0xb0643ddf, 0x6125d083, + 0xff257a4f, 0x1c22f5c1, 0x82225f0d, 0x9b2b9a07, 0x052b30cb, + 0xe62cbf45, 0x782c1589, 0xd43e3550, 0x4a3e9f9c, 0xa9391012, + 0x3739bade, 0x2e307fd4, 0xb030d518, 0x53375a96, 0xcd37f05a, + 0x4a146bff, 0xd414c133, 0x37134ebd, 0xa913e471, 0xb01a217b, + 0x2e1a8bb7, 0xcd1d0439, 0x531daef5, 0xff0f8e2c, 0x610f24e0, + 0x8208ab6e, 0x1c0801a2, 0x0501c4a8, 0x9b016e64, 0x7806e1ea, + 0xe6064b26}}; + +#endif + +#endif + +#if N == 3 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x81256527, 0xd93bcc0f, 0x581ea928, 0x69069e5f, + 0xe823fb78, 0xb03d5250, 0x31183777, 0xd20d3cbe, 0x53285999, + 0x0b36f0b1, 0x8a139596, 0xbb0ba2e1, 0x3a2ec7c6, 0x62306eee, + 0xe3150bc9, 0x7f6b7f3d, 0xfe4e1a1a, 0xa650b332, 0x2775d615, + 0x166de162, 0x97488445, 0xcf562d6d, 0x4e73484a, 0xad664383, + 0x2c4326a4, 0x745d8f8c, 0xf578eaab, 0xc460dddc, 0x4545b8fb, + 0x1d5b11d3, 0x9c7e74f4, 0xfed6fe7a, 0x7ff39b5d, 0x27ed3275, + 0xa6c85752, 0x97d06025, 0x16f50502, 0x4eebac2a, 0xcfcec90d, + 0x2cdbc2c4, 0xadfea7e3, 0xf5e00ecb, 0x74c56bec, 0x45dd5c9b, + 0xc4f839bc, 0x9ce69094, 0x1dc3f5b3, 0x81bd8147, 0x0098e460, + 0x58864d48, 0xd9a3286f, 0xe8bb1f18, 0x699e7a3f, 0x3180d317, + 0xb0a5b630, 0x53b0bdf9, 0xd295d8de, 0x8a8b71f6, 0x0bae14d1, + 0x3ab623a6, 0xbb934681, 0xe38defa9, 0x62a88a8e, 0x26dcfab5, + 0xa7f99f92, 0xffe736ba, 0x7ec2539d, 0x4fda64ea, 0xceff01cd, + 0x96e1a8e5, 0x17c4cdc2, 0xf4d1c60b, 0x75f4a32c, 0x2dea0a04, + 0xaccf6f23, 0x9dd75854, 0x1cf23d73, 0x44ec945b, 0xc5c9f17c, + 0x59b78588, 0xd892e0af, 0x808c4987, 0x01a92ca0, 0x30b11bd7, + 0xb1947ef0, 0xe98ad7d8, 0x68afb2ff, 0x8bbab936, 0x0a9fdc11, + 0x52817539, 0xd3a4101e, 0xe2bc2769, 0x6399424e, 0x3b87eb66, + 0xbaa28e41, 0xd80a04cf, 0x592f61e8, 0x0131c8c0, 0x8014ade7, + 0xb10c9a90, 0x3029ffb7, 0x6837569f, 0xe91233b8, 0x0a073871, + 0x8b225d56, 0xd33cf47e, 0x52199159, 0x6301a62e, 0xe224c309, + 0xba3a6a21, 0x3b1f0f06, 0xa7617bf2, 0x26441ed5, 0x7e5ab7fd, + 0xff7fd2da, 0xce67e5ad, 0x4f42808a, 0x175c29a2, 0x96794c85, + 0x756c474c, 0xf449226b, 0xac578b43, 0x2d72ee64, 0x1c6ad913, + 0x9d4fbc34, 0xc551151c, 0x4474703b, 0x4db9f56a, 0xcc9c904d, + 0x94823965, 0x15a75c42, 0x24bf6b35, 0xa59a0e12, 0xfd84a73a, + 0x7ca1c21d, 0x9fb4c9d4, 0x1e91acf3, 0x468f05db, 0xc7aa60fc, + 0xf6b2578b, 0x779732ac, 0x2f899b84, 0xaeacfea3, 0x32d28a57, + 0xb3f7ef70, 0xebe94658, 0x6acc237f, 0x5bd41408, 0xdaf1712f, + 0x82efd807, 0x03cabd20, 0xe0dfb6e9, 0x61fad3ce, 0x39e47ae6, + 0xb8c11fc1, 0x89d928b6, 0x08fc4d91, 0x50e2e4b9, 0xd1c7819e, + 0xb36f0b10, 0x324a6e37, 0x6a54c71f, 0xeb71a238, 0xda69954f, + 0x5b4cf068, 0x03525940, 0x82773c67, 0x616237ae, 0xe0475289, + 0xb859fba1, 0x397c9e86, 0x0864a9f1, 0x8941ccd6, 0xd15f65fe, + 0x507a00d9, 0xcc04742d, 0x4d21110a, 0x153fb822, 0x941add05, + 0xa502ea72, 0x24278f55, 0x7c39267d, 0xfd1c435a, 0x1e094893, + 0x9f2c2db4, 0xc732849c, 0x4617e1bb, 0x770fd6cc, 0xf62ab3eb, + 0xae341ac3, 0x2f117fe4, 0x6b650fdf, 0xea406af8, 0xb25ec3d0, + 0x337ba6f7, 0x02639180, 0x8346f4a7, 0xdb585d8f, 0x5a7d38a8, + 0xb9683361, 0x384d5646, 0x6053ff6e, 0xe1769a49, 0xd06ead3e, + 0x514bc819, 0x09556131, 0x88700416, 0x140e70e2, 0x952b15c5, + 0xcd35bced, 0x4c10d9ca, 0x7d08eebd, 0xfc2d8b9a, 0xa43322b2, + 0x25164795, 0xc6034c5c, 0x4726297b, 0x1f388053, 0x9e1de574, + 0xaf05d203, 0x2e20b724, 0x763e1e0c, 0xf71b7b2b, 0x95b3f1a5, + 0x14969482, 0x4c883daa, 0xcdad588d, 0xfcb56ffa, 0x7d900add, + 0x258ea3f5, 0xa4abc6d2, 0x47becd1b, 0xc69ba83c, 0x9e850114, + 0x1fa06433, 0x2eb85344, 0xaf9d3663, 0xf7839f4b, 0x76a6fa6c, + 0xead88e98, 0x6bfdebbf, 0x33e34297, 0xb2c627b0, 0x83de10c7, + 0x02fb75e0, 0x5ae5dcc8, 0xdbc0b9ef, 0x38d5b226, 0xb9f0d701, + 0xe1ee7e29, 0x60cb1b0e, 0x51d32c79, 0xd0f6495e, 0x88e8e076, + 0x09cd8551}, + {0x00000000, 0x9b73ead4, 0xed96d3e9, 0x76e5393d, 0x005ca193, + 0x9b2f4b47, 0xedca727a, 0x76b998ae, 0x00b94326, 0x9bcaa9f2, + 0xed2f90cf, 0x765c7a1b, 0x00e5e2b5, 0x9b960861, 0xed73315c, + 0x7600db88, 0x0172864c, 0x9a016c98, 0xece455a5, 0x7797bf71, + 0x012e27df, 0x9a5dcd0b, 0xecb8f436, 0x77cb1ee2, 0x01cbc56a, + 0x9ab82fbe, 0xec5d1683, 0x772efc57, 0x019764f9, 0x9ae48e2d, + 0xec01b710, 0x77725dc4, 0x02e50c98, 0x9996e64c, 0xef73df71, + 0x740035a5, 0x02b9ad0b, 0x99ca47df, 0xef2f7ee2, 0x745c9436, + 0x025c4fbe, 0x992fa56a, 0xefca9c57, 0x74b97683, 0x0200ee2d, + 0x997304f9, 0xef963dc4, 0x74e5d710, 0x03978ad4, 0x98e46000, + 0xee01593d, 0x7572b3e9, 0x03cb2b47, 0x98b8c193, 0xee5df8ae, + 0x752e127a, 0x032ec9f2, 0x985d2326, 0xeeb81a1b, 0x75cbf0cf, + 0x03726861, 0x980182b5, 0xeee4bb88, 0x7597515c, 0x05ca1930, + 0x9eb9f3e4, 0xe85ccad9, 0x732f200d, 0x0596b8a3, 0x9ee55277, + 0xe8006b4a, 0x7373819e, 0x05735a16, 0x9e00b0c2, 0xe8e589ff, + 0x7396632b, 0x052ffb85, 0x9e5c1151, 0xe8b9286c, 0x73cac2b8, + 0x04b89f7c, 0x9fcb75a8, 0xe92e4c95, 0x725da641, 0x04e43eef, + 0x9f97d43b, 0xe972ed06, 0x720107d2, 0x0401dc5a, 0x9f72368e, + 0xe9970fb3, 0x72e4e567, 0x045d7dc9, 0x9f2e971d, 0xe9cbae20, + 0x72b844f4, 0x072f15a8, 0x9c5cff7c, 0xeab9c641, 0x71ca2c95, + 0x0773b43b, 0x9c005eef, 0xeae567d2, 0x71968d06, 0x0796568e, + 0x9ce5bc5a, 0xea008567, 0x71736fb3, 0x07caf71d, 0x9cb91dc9, + 0xea5c24f4, 0x712fce20, 0x065d93e4, 0x9d2e7930, 0xebcb400d, + 0x70b8aad9, 0x06013277, 0x9d72d8a3, 0xeb97e19e, 0x70e40b4a, + 0x06e4d0c2, 0x9d973a16, 0xeb72032b, 0x7001e9ff, 0x06b87151, + 0x9dcb9b85, 0xeb2ea2b8, 0x705d486c, 0x0b943260, 0x90e7d8b4, + 0xe602e189, 0x7d710b5d, 0x0bc893f3, 0x90bb7927, 0xe65e401a, + 0x7d2daace, 0x0b2d7146, 0x905e9b92, 0xe6bba2af, 0x7dc8487b, + 0x0b71d0d5, 0x90023a01, 0xe6e7033c, 0x7d94e9e8, 0x0ae6b42c, + 0x91955ef8, 0xe77067c5, 0x7c038d11, 0x0aba15bf, 0x91c9ff6b, + 0xe72cc656, 0x7c5f2c82, 0x0a5ff70a, 0x912c1dde, 0xe7c924e3, + 0x7cbace37, 0x0a035699, 0x9170bc4d, 0xe7958570, 0x7ce66fa4, + 0x09713ef8, 0x9202d42c, 0xe4e7ed11, 0x7f9407c5, 0x092d9f6b, + 0x925e75bf, 0xe4bb4c82, 0x7fc8a656, 0x09c87dde, 0x92bb970a, + 0xe45eae37, 0x7f2d44e3, 0x0994dc4d, 0x92e73699, 0xe4020fa4, + 0x7f71e570, 0x0803b8b4, 0x93705260, 0xe5956b5d, 0x7ee68189, + 0x085f1927, 0x932cf3f3, 0xe5c9cace, 0x7eba201a, 0x08bafb92, + 0x93c91146, 0xe52c287b, 0x7e5fc2af, 0x08e65a01, 0x9395b0d5, + 0xe57089e8, 0x7e03633c, 0x0e5e2b50, 0x952dc184, 0xe3c8f8b9, + 0x78bb126d, 0x0e028ac3, 0x95716017, 0xe394592a, 0x78e7b3fe, + 0x0ee76876, 0x959482a2, 0xe371bb9f, 0x7802514b, 0x0ebbc9e5, + 0x95c82331, 0xe32d1a0c, 0x785ef0d8, 0x0f2cad1c, 0x945f47c8, + 0xe2ba7ef5, 0x79c99421, 0x0f700c8f, 0x9403e65b, 0xe2e6df66, + 0x799535b2, 0x0f95ee3a, 0x94e604ee, 0xe2033dd3, 0x7970d707, + 0x0fc94fa9, 0x94baa57d, 0xe25f9c40, 0x792c7694, 0x0cbb27c8, + 0x97c8cd1c, 0xe12df421, 0x7a5e1ef5, 0x0ce7865b, 0x97946c8f, + 0xe17155b2, 0x7a02bf66, 0x0c0264ee, 0x97718e3a, 0xe194b707, + 0x7ae75dd3, 0x0c5ec57d, 0x972d2fa9, 0xe1c81694, 0x7abbfc40, + 0x0dc9a184, 0x96ba4b50, 0xe05f726d, 0x7b2c98b9, 0x0d950017, + 0x96e6eac3, 0xe003d3fe, 0x7b70392a, 0x0d70e2a2, 0x96030876, + 0xe0e6314b, 0x7b95db9f, 0x0d2c4331, 0x965fa9e5, 0xe0ba90d8, + 0x7bc97a0c}, + {0x00000000, 0x172864c0, 0x2e50c980, 0x3978ad40, 0x5ca19300, + 0x4b89f7c0, 0x72f15a80, 0x65d93e40, 0xb9432600, 0xae6b42c0, + 0x9713ef80, 0x803b8b40, 0xe5e2b500, 0xf2cad1c0, 0xcbb27c80, + 0xdc9a1840, 0xa9f74a41, 0xbedf2e81, 0x87a783c1, 0x908fe701, + 0xf556d941, 0xe27ebd81, 0xdb0610c1, 0xcc2e7401, 0x10b46c41, + 0x079c0881, 0x3ee4a5c1, 0x29ccc101, 0x4c15ff41, 0x5b3d9b81, + 0x624536c1, 0x756d5201, 0x889f92c3, 0x9fb7f603, 0xa6cf5b43, + 0xb1e73f83, 0xd43e01c3, 0xc3166503, 0xfa6ec843, 0xed46ac83, + 0x31dcb4c3, 0x26f4d003, 0x1f8c7d43, 0x08a41983, 0x6d7d27c3, + 0x7a554303, 0x432dee43, 0x54058a83, 0x2168d882, 0x3640bc42, + 0x0f381102, 0x181075c2, 0x7dc94b82, 0x6ae12f42, 0x53998202, + 0x44b1e6c2, 0x982bfe82, 0x8f039a42, 0xb67b3702, 0xa15353c2, + 0xc48a6d82, 0xd3a20942, 0xeadaa402, 0xfdf2c0c2, 0xca4e23c7, + 0xdd664707, 0xe41eea47, 0xf3368e87, 0x96efb0c7, 0x81c7d407, + 0xb8bf7947, 0xaf971d87, 0x730d05c7, 0x64256107, 0x5d5dcc47, + 0x4a75a887, 0x2fac96c7, 0x3884f207, 0x01fc5f47, 0x16d43b87, + 0x63b96986, 0x74910d46, 0x4de9a006, 0x5ac1c4c6, 0x3f18fa86, + 0x28309e46, 0x11483306, 0x066057c6, 0xdafa4f86, 0xcdd22b46, + 0xf4aa8606, 0xe382e2c6, 0x865bdc86, 0x9173b846, 0xa80b1506, + 0xbf2371c6, 0x42d1b104, 0x55f9d5c4, 0x6c817884, 0x7ba91c44, + 0x1e702204, 0x095846c4, 0x3020eb84, 0x27088f44, 0xfb929704, + 0xecbaf3c4, 0xd5c25e84, 0xc2ea3a44, 0xa7330404, 0xb01b60c4, + 0x8963cd84, 0x9e4ba944, 0xeb26fb45, 0xfc0e9f85, 0xc57632c5, + 0xd25e5605, 0xb7876845, 0xa0af0c85, 0x99d7a1c5, 0x8effc505, + 0x5265dd45, 0x454db985, 0x7c3514c5, 0x6b1d7005, 0x0ec44e45, + 0x19ec2a85, 0x209487c5, 0x37bce305, 0x4fed41cf, 0x58c5250f, + 0x61bd884f, 0x7695ec8f, 0x134cd2cf, 0x0464b60f, 0x3d1c1b4f, + 0x2a347f8f, 0xf6ae67cf, 0xe186030f, 0xd8feae4f, 0xcfd6ca8f, + 0xaa0ff4cf, 0xbd27900f, 0x845f3d4f, 0x9377598f, 0xe61a0b8e, + 0xf1326f4e, 0xc84ac20e, 0xdf62a6ce, 0xbabb988e, 0xad93fc4e, + 0x94eb510e, 0x83c335ce, 0x5f592d8e, 0x4871494e, 0x7109e40e, + 0x662180ce, 0x03f8be8e, 0x14d0da4e, 0x2da8770e, 0x3a8013ce, + 0xc772d30c, 0xd05ab7cc, 0xe9221a8c, 0xfe0a7e4c, 0x9bd3400c, + 0x8cfb24cc, 0xb583898c, 0xa2abed4c, 0x7e31f50c, 0x691991cc, + 0x50613c8c, 0x4749584c, 0x2290660c, 0x35b802cc, 0x0cc0af8c, + 0x1be8cb4c, 0x6e85994d, 0x79adfd8d, 0x40d550cd, 0x57fd340d, + 0x32240a4d, 0x250c6e8d, 0x1c74c3cd, 0x0b5ca70d, 0xd7c6bf4d, + 0xc0eedb8d, 0xf99676cd, 0xeebe120d, 0x8b672c4d, 0x9c4f488d, + 0xa537e5cd, 0xb21f810d, 0x85a36208, 0x928b06c8, 0xabf3ab88, + 0xbcdbcf48, 0xd902f108, 0xce2a95c8, 0xf7523888, 0xe07a5c48, + 0x3ce04408, 0x2bc820c8, 0x12b08d88, 0x0598e948, 0x6041d708, + 0x7769b3c8, 0x4e111e88, 0x59397a48, 0x2c542849, 0x3b7c4c89, + 0x0204e1c9, 0x152c8509, 0x70f5bb49, 0x67dddf89, 0x5ea572c9, + 0x498d1609, 0x95170e49, 0x823f6a89, 0xbb47c7c9, 0xac6fa309, + 0xc9b69d49, 0xde9ef989, 0xe7e654c9, 0xf0ce3009, 0x0d3cf0cb, + 0x1a14940b, 0x236c394b, 0x34445d8b, 0x519d63cb, 0x46b5070b, + 0x7fcdaa4b, 0x68e5ce8b, 0xb47fd6cb, 0xa357b20b, 0x9a2f1f4b, + 0x8d077b8b, 0xe8de45cb, 0xfff6210b, 0xc68e8c4b, 0xd1a6e88b, + 0xa4cbba8a, 0xb3e3de4a, 0x8a9b730a, 0x9db317ca, 0xf86a298a, + 0xef424d4a, 0xd63ae00a, 0xc11284ca, 0x1d889c8a, 0x0aa0f84a, + 0x33d8550a, 0x24f031ca, 0x41290f8a, 0x56016b4a, 0x6f79c60a, + 0x7851a2ca}, + {0x00000000, 0x9fda839e, 0xe4c4017d, 0x7b1e82e3, 0x12f904bb, + 0x8d238725, 0xf63d05c6, 0x69e78658, 0x25f20976, 0xba288ae8, + 0xc136080b, 0x5eec8b95, 0x370b0dcd, 0xa8d18e53, 0xd3cf0cb0, + 0x4c158f2e, 0x4be412ec, 0xd43e9172, 0xaf201391, 0x30fa900f, + 0x591d1657, 0xc6c795c9, 0xbdd9172a, 0x220394b4, 0x6e161b9a, + 0xf1cc9804, 0x8ad21ae7, 0x15089979, 0x7cef1f21, 0xe3359cbf, + 0x982b1e5c, 0x07f19dc2, 0x97c825d8, 0x0812a646, 0x730c24a5, + 0xecd6a73b, 0x85312163, 0x1aeba2fd, 0x61f5201e, 0xfe2fa380, + 0xb23a2cae, 0x2de0af30, 0x56fe2dd3, 0xc924ae4d, 0xa0c32815, + 0x3f19ab8b, 0x44072968, 0xdbddaaf6, 0xdc2c3734, 0x43f6b4aa, + 0x38e83649, 0xa732b5d7, 0xced5338f, 0x510fb011, 0x2a1132f2, + 0xb5cbb16c, 0xf9de3e42, 0x6604bddc, 0x1d1a3f3f, 0x82c0bca1, + 0xeb273af9, 0x74fdb967, 0x0fe33b84, 0x9039b81a, 0xf4e14df1, + 0x6b3bce6f, 0x10254c8c, 0x8fffcf12, 0xe618494a, 0x79c2cad4, + 0x02dc4837, 0x9d06cba9, 0xd1134487, 0x4ec9c719, 0x35d745fa, + 0xaa0dc664, 0xc3ea403c, 0x5c30c3a2, 0x272e4141, 0xb8f4c2df, + 0xbf055f1d, 0x20dfdc83, 0x5bc15e60, 0xc41bddfe, 0xadfc5ba6, + 0x3226d838, 0x49385adb, 0xd6e2d945, 0x9af7566b, 0x052dd5f5, + 0x7e335716, 0xe1e9d488, 0x880e52d0, 0x17d4d14e, 0x6cca53ad, + 0xf310d033, 0x63296829, 0xfcf3ebb7, 0x87ed6954, 0x1837eaca, + 0x71d06c92, 0xee0aef0c, 0x95146def, 0x0aceee71, 0x46db615f, + 0xd901e2c1, 0xa21f6022, 0x3dc5e3bc, 0x542265e4, 0xcbf8e67a, + 0xb0e66499, 0x2f3ce707, 0x28cd7ac5, 0xb717f95b, 0xcc097bb8, + 0x53d3f826, 0x3a347e7e, 0xa5eefde0, 0xdef07f03, 0x412afc9d, + 0x0d3f73b3, 0x92e5f02d, 0xe9fb72ce, 0x7621f150, 0x1fc67708, + 0x801cf496, 0xfb027675, 0x64d8f5eb, 0x32b39da3, 0xad691e3d, + 0xd6779cde, 0x49ad1f40, 0x204a9918, 0xbf901a86, 0xc48e9865, + 0x5b541bfb, 0x174194d5, 0x889b174b, 0xf38595a8, 0x6c5f1636, + 0x05b8906e, 0x9a6213f0, 0xe17c9113, 0x7ea6128d, 0x79578f4f, + 0xe68d0cd1, 0x9d938e32, 0x02490dac, 0x6bae8bf4, 0xf474086a, + 0x8f6a8a89, 0x10b00917, 0x5ca58639, 0xc37f05a7, 0xb8618744, + 0x27bb04da, 0x4e5c8282, 0xd186011c, 0xaa9883ff, 0x35420061, + 0xa57bb87b, 0x3aa13be5, 0x41bfb906, 0xde653a98, 0xb782bcc0, + 0x28583f5e, 0x5346bdbd, 0xcc9c3e23, 0x8089b10d, 0x1f533293, + 0x644db070, 0xfb9733ee, 0x9270b5b6, 0x0daa3628, 0x76b4b4cb, + 0xe96e3755, 0xee9faa97, 0x71452909, 0x0a5babea, 0x95812874, + 0xfc66ae2c, 0x63bc2db2, 0x18a2af51, 0x87782ccf, 0xcb6da3e1, + 0x54b7207f, 0x2fa9a29c, 0xb0732102, 0xd994a75a, 0x464e24c4, + 0x3d50a627, 0xa28a25b9, 0xc652d052, 0x598853cc, 0x2296d12f, + 0xbd4c52b1, 0xd4abd4e9, 0x4b715777, 0x306fd594, 0xafb5560a, + 0xe3a0d924, 0x7c7a5aba, 0x0764d859, 0x98be5bc7, 0xf159dd9f, + 0x6e835e01, 0x159ddce2, 0x8a475f7c, 0x8db6c2be, 0x126c4120, + 0x6972c3c3, 0xf6a8405d, 0x9f4fc605, 0x0095459b, 0x7b8bc778, + 0xe45144e6, 0xa844cbc8, 0x379e4856, 0x4c80cab5, 0xd35a492b, + 0xbabdcf73, 0x25674ced, 0x5e79ce0e, 0xc1a34d90, 0x519af58a, + 0xce407614, 0xb55ef4f7, 0x2a847769, 0x4363f131, 0xdcb972af, + 0xa7a7f04c, 0x387d73d2, 0x7468fcfc, 0xebb27f62, 0x90acfd81, + 0x0f767e1f, 0x6691f847, 0xf94b7bd9, 0x8255f93a, 0x1d8f7aa4, + 0x1a7ee766, 0x85a464f8, 0xfebae61b, 0x61606585, 0x0887e3dd, + 0x975d6043, 0xec43e2a0, 0x7399613e, 0x3f8cee10, 0xa0566d8e, + 0xdb48ef6d, 0x44926cf3, 0x2d75eaab, 0xb2af6935, 0xc9b1ebd6, + 0x566b6848}, + {0x00000000, 0x65673b46, 0xcace768c, 0xafa94dca, 0x4eedeb59, + 0x2b8ad01f, 0x84239dd5, 0xe144a693, 0x9ddbd6b2, 0xf8bcedf4, + 0x5715a03e, 0x32729b78, 0xd3363deb, 0xb65106ad, 0x19f84b67, + 0x7c9f7021, 0xe0c6ab25, 0x85a19063, 0x2a08dda9, 0x4f6fe6ef, + 0xae2b407c, 0xcb4c7b3a, 0x64e536f0, 0x01820db6, 0x7d1d7d97, + 0x187a46d1, 0xb7d30b1b, 0xd2b4305d, 0x33f096ce, 0x5697ad88, + 0xf93ee042, 0x9c59db04, 0x1afc500b, 0x7f9b6b4d, 0xd0322687, + 0xb5551dc1, 0x5411bb52, 0x31768014, 0x9edfcdde, 0xfbb8f698, + 0x872786b9, 0xe240bdff, 0x4de9f035, 0x288ecb73, 0xc9ca6de0, + 0xacad56a6, 0x03041b6c, 0x6663202a, 0xfa3afb2e, 0x9f5dc068, + 0x30f48da2, 0x5593b6e4, 0xb4d71077, 0xd1b02b31, 0x7e1966fb, + 0x1b7e5dbd, 0x67e12d9c, 0x028616da, 0xad2f5b10, 0xc8486056, + 0x290cc6c5, 0x4c6bfd83, 0xe3c2b049, 0x86a58b0f, 0x35f8a016, + 0x509f9b50, 0xff36d69a, 0x9a51eddc, 0x7b154b4f, 0x1e727009, + 0xb1db3dc3, 0xd4bc0685, 0xa82376a4, 0xcd444de2, 0x62ed0028, + 0x078a3b6e, 0xe6ce9dfd, 0x83a9a6bb, 0x2c00eb71, 0x4967d037, + 0xd53e0b33, 0xb0593075, 0x1ff07dbf, 0x7a9746f9, 0x9bd3e06a, + 0xfeb4db2c, 0x511d96e6, 0x347aada0, 0x48e5dd81, 0x2d82e6c7, + 0x822bab0d, 0xe74c904b, 0x060836d8, 0x636f0d9e, 0xccc64054, + 0xa9a17b12, 0x2f04f01d, 0x4a63cb5b, 0xe5ca8691, 0x80adbdd7, + 0x61e91b44, 0x048e2002, 0xab276dc8, 0xce40568e, 0xb2df26af, + 0xd7b81de9, 0x78115023, 0x1d766b65, 0xfc32cdf6, 0x9955f6b0, + 0x36fcbb7a, 0x539b803c, 0xcfc25b38, 0xaaa5607e, 0x050c2db4, + 0x606b16f2, 0x812fb061, 0xe4488b27, 0x4be1c6ed, 0x2e86fdab, + 0x52198d8a, 0x377eb6cc, 0x98d7fb06, 0xfdb0c040, 0x1cf466d3, + 0x79935d95, 0xd63a105f, 0xb35d2b19, 0x6bf1402c, 0x0e967b6a, + 0xa13f36a0, 0xc4580de6, 0x251cab75, 0x407b9033, 0xefd2ddf9, + 0x8ab5e6bf, 0xf62a969e, 0x934dadd8, 0x3ce4e012, 0x5983db54, + 0xb8c77dc7, 0xdda04681, 0x72090b4b, 0x176e300d, 0x8b37eb09, + 0xee50d04f, 0x41f99d85, 0x249ea6c3, 0xc5da0050, 0xa0bd3b16, + 0x0f1476dc, 0x6a734d9a, 0x16ec3dbb, 0x738b06fd, 0xdc224b37, + 0xb9457071, 0x5801d6e2, 0x3d66eda4, 0x92cfa06e, 0xf7a89b28, + 0x710d1027, 0x146a2b61, 0xbbc366ab, 0xdea45ded, 0x3fe0fb7e, + 0x5a87c038, 0xf52e8df2, 0x9049b6b4, 0xecd6c695, 0x89b1fdd3, + 0x2618b019, 0x437f8b5f, 0xa23b2dcc, 0xc75c168a, 0x68f55b40, + 0x0d926006, 0x91cbbb02, 0xf4ac8044, 0x5b05cd8e, 0x3e62f6c8, + 0xdf26505b, 0xba416b1d, 0x15e826d7, 0x708f1d91, 0x0c106db0, + 0x697756f6, 0xc6de1b3c, 0xa3b9207a, 0x42fd86e9, 0x279abdaf, + 0x8833f065, 0xed54cb23, 0x5e09e03a, 0x3b6edb7c, 0x94c796b6, + 0xf1a0adf0, 0x10e40b63, 0x75833025, 0xda2a7def, 0xbf4d46a9, + 0xc3d23688, 0xa6b50dce, 0x091c4004, 0x6c7b7b42, 0x8d3fddd1, + 0xe858e697, 0x47f1ab5d, 0x2296901b, 0xbecf4b1f, 0xdba87059, + 0x74013d93, 0x116606d5, 0xf022a046, 0x95459b00, 0x3aecd6ca, + 0x5f8bed8c, 0x23149dad, 0x4673a6eb, 0xe9daeb21, 0x8cbdd067, + 0x6df976f4, 0x089e4db2, 0xa7370078, 0xc2503b3e, 0x44f5b031, + 0x21928b77, 0x8e3bc6bd, 0xeb5cfdfb, 0x0a185b68, 0x6f7f602e, + 0xc0d62de4, 0xa5b116a2, 0xd92e6683, 0xbc495dc5, 0x13e0100f, + 0x76872b49, 0x97c38dda, 0xf2a4b69c, 0x5d0dfb56, 0x386ac010, + 0xa4331b14, 0xc1542052, 0x6efd6d98, 0x0b9a56de, 0xeadef04d, + 0x8fb9cb0b, 0x201086c1, 0x4577bd87, 0x39e8cda6, 0x5c8ff6e0, + 0xf326bb2a, 0x9641806c, 0x770526ff, 0x12621db9, 0xbdcb5073, + 0xd8ac6b35}, + {0x00000000, 0xd7e28058, 0x74b406f1, 0xa35686a9, 0xe9680de2, + 0x3e8a8dba, 0x9ddc0b13, 0x4a3e8b4b, 0x09a11d85, 0xde439ddd, + 0x7d151b74, 0xaaf79b2c, 0xe0c91067, 0x372b903f, 0x947d1696, + 0x439f96ce, 0x13423b0a, 0xc4a0bb52, 0x67f63dfb, 0xb014bda3, + 0xfa2a36e8, 0x2dc8b6b0, 0x8e9e3019, 0x597cb041, 0x1ae3268f, + 0xcd01a6d7, 0x6e57207e, 0xb9b5a026, 0xf38b2b6d, 0x2469ab35, + 0x873f2d9c, 0x50ddadc4, 0x26847614, 0xf166f64c, 0x523070e5, + 0x85d2f0bd, 0xcfec7bf6, 0x180efbae, 0xbb587d07, 0x6cbafd5f, + 0x2f256b91, 0xf8c7ebc9, 0x5b916d60, 0x8c73ed38, 0xc64d6673, + 0x11afe62b, 0xb2f96082, 0x651be0da, 0x35c64d1e, 0xe224cd46, + 0x41724bef, 0x9690cbb7, 0xdcae40fc, 0x0b4cc0a4, 0xa81a460d, + 0x7ff8c655, 0x3c67509b, 0xeb85d0c3, 0x48d3566a, 0x9f31d632, + 0xd50f5d79, 0x02eddd21, 0xa1bb5b88, 0x7659dbd0, 0x4d08ec28, + 0x9aea6c70, 0x39bcead9, 0xee5e6a81, 0xa460e1ca, 0x73826192, + 0xd0d4e73b, 0x07366763, 0x44a9f1ad, 0x934b71f5, 0x301df75c, + 0xe7ff7704, 0xadc1fc4f, 0x7a237c17, 0xd975fabe, 0x0e977ae6, + 0x5e4ad722, 0x89a8577a, 0x2afed1d3, 0xfd1c518b, 0xb722dac0, + 0x60c05a98, 0xc396dc31, 0x14745c69, 0x57ebcaa7, 0x80094aff, + 0x235fcc56, 0xf4bd4c0e, 0xbe83c745, 0x6961471d, 0xca37c1b4, + 0x1dd541ec, 0x6b8c9a3c, 0xbc6e1a64, 0x1f389ccd, 0xc8da1c95, + 0x82e497de, 0x55061786, 0xf650912f, 0x21b21177, 0x622d87b9, + 0xb5cf07e1, 0x16998148, 0xc17b0110, 0x8b458a5b, 0x5ca70a03, + 0xfff18caa, 0x28130cf2, 0x78cea136, 0xaf2c216e, 0x0c7aa7c7, + 0xdb98279f, 0x91a6acd4, 0x46442c8c, 0xe512aa25, 0x32f02a7d, + 0x716fbcb3, 0xa68d3ceb, 0x05dbba42, 0xd2393a1a, 0x9807b151, + 0x4fe53109, 0xecb3b7a0, 0x3b5137f8, 0x9a11d850, 0x4df35808, + 0xeea5dea1, 0x39475ef9, 0x7379d5b2, 0xa49b55ea, 0x07cdd343, + 0xd02f531b, 0x93b0c5d5, 0x4452458d, 0xe704c324, 0x30e6437c, + 0x7ad8c837, 0xad3a486f, 0x0e6ccec6, 0xd98e4e9e, 0x8953e35a, + 0x5eb16302, 0xfde7e5ab, 0x2a0565f3, 0x603beeb8, 0xb7d96ee0, + 0x148fe849, 0xc36d6811, 0x80f2fedf, 0x57107e87, 0xf446f82e, + 0x23a47876, 0x699af33d, 0xbe787365, 0x1d2ef5cc, 0xcacc7594, + 0xbc95ae44, 0x6b772e1c, 0xc821a8b5, 0x1fc328ed, 0x55fda3a6, + 0x821f23fe, 0x2149a557, 0xf6ab250f, 0xb534b3c1, 0x62d63399, + 0xc180b530, 0x16623568, 0x5c5cbe23, 0x8bbe3e7b, 0x28e8b8d2, + 0xff0a388a, 0xafd7954e, 0x78351516, 0xdb6393bf, 0x0c8113e7, + 0x46bf98ac, 0x915d18f4, 0x320b9e5d, 0xe5e91e05, 0xa67688cb, + 0x71940893, 0xd2c28e3a, 0x05200e62, 0x4f1e8529, 0x98fc0571, + 0x3baa83d8, 0xec480380, 0xd7193478, 0x00fbb420, 0xa3ad3289, + 0x744fb2d1, 0x3e71399a, 0xe993b9c2, 0x4ac53f6b, 0x9d27bf33, + 0xdeb829fd, 0x095aa9a5, 0xaa0c2f0c, 0x7deeaf54, 0x37d0241f, + 0xe032a447, 0x436422ee, 0x9486a2b6, 0xc45b0f72, 0x13b98f2a, + 0xb0ef0983, 0x670d89db, 0x2d330290, 0xfad182c8, 0x59870461, + 0x8e658439, 0xcdfa12f7, 0x1a1892af, 0xb94e1406, 0x6eac945e, + 0x24921f15, 0xf3709f4d, 0x502619e4, 0x87c499bc, 0xf19d426c, + 0x267fc234, 0x8529449d, 0x52cbc4c5, 0x18f54f8e, 0xcf17cfd6, + 0x6c41497f, 0xbba3c927, 0xf83c5fe9, 0x2fdedfb1, 0x8c885918, + 0x5b6ad940, 0x1154520b, 0xc6b6d253, 0x65e054fa, 0xb202d4a2, + 0xe2df7966, 0x353df93e, 0x966b7f97, 0x4189ffcf, 0x0bb77484, + 0xdc55f4dc, 0x7f037275, 0xa8e1f22d, 0xeb7e64e3, 0x3c9ce4bb, + 0x9fca6212, 0x4828e24a, 0x02166901, 0xd5f4e959, 0x76a26ff0, + 0xa140efa8}, + {0x00000000, 0xef52b6e1, 0x05d46b83, 0xea86dd62, 0x0ba8d706, + 0xe4fa61e7, 0x0e7cbc85, 0xe12e0a64, 0x1751ae0c, 0xf80318ed, + 0x1285c58f, 0xfdd7736e, 0x1cf9790a, 0xf3abcfeb, 0x192d1289, + 0xf67fa468, 0x2ea35c18, 0xc1f1eaf9, 0x2b77379b, 0xc425817a, + 0x250b8b1e, 0xca593dff, 0x20dfe09d, 0xcf8d567c, 0x39f2f214, + 0xd6a044f5, 0x3c269997, 0xd3742f76, 0x325a2512, 0xdd0893f3, + 0x378e4e91, 0xd8dcf870, 0x5d46b830, 0xb2140ed1, 0x5892d3b3, + 0xb7c06552, 0x56ee6f36, 0xb9bcd9d7, 0x533a04b5, 0xbc68b254, + 0x4a17163c, 0xa545a0dd, 0x4fc37dbf, 0xa091cb5e, 0x41bfc13a, + 0xaeed77db, 0x446baab9, 0xab391c58, 0x73e5e428, 0x9cb752c9, + 0x76318fab, 0x9963394a, 0x784d332e, 0x971f85cf, 0x7d9958ad, + 0x92cbee4c, 0x64b44a24, 0x8be6fcc5, 0x616021a7, 0x8e329746, + 0x6f1c9d22, 0x804e2bc3, 0x6ac8f6a1, 0x859a4040, 0xba8d7060, + 0x55dfc681, 0xbf591be3, 0x500bad02, 0xb125a766, 0x5e771187, + 0xb4f1cce5, 0x5ba37a04, 0xaddcde6c, 0x428e688d, 0xa808b5ef, + 0x475a030e, 0xa674096a, 0x4926bf8b, 0xa3a062e9, 0x4cf2d408, + 0x942e2c78, 0x7b7c9a99, 0x91fa47fb, 0x7ea8f11a, 0x9f86fb7e, + 0x70d44d9f, 0x9a5290fd, 0x7500261c, 0x837f8274, 0x6c2d3495, + 0x86abe9f7, 0x69f95f16, 0x88d75572, 0x6785e393, 0x8d033ef1, + 0x62518810, 0xe7cbc850, 0x08997eb1, 0xe21fa3d3, 0x0d4d1532, + 0xec631f56, 0x0331a9b7, 0xe9b774d5, 0x06e5c234, 0xf09a665c, + 0x1fc8d0bd, 0xf54e0ddf, 0x1a1cbb3e, 0xfb32b15a, 0x146007bb, + 0xfee6dad9, 0x11b46c38, 0xc9689448, 0x263a22a9, 0xccbcffcb, + 0x23ee492a, 0xc2c0434e, 0x2d92f5af, 0xc71428cd, 0x28469e2c, + 0xde393a44, 0x316b8ca5, 0xdbed51c7, 0x34bfe726, 0xd591ed42, + 0x3ac35ba3, 0xd04586c1, 0x3f173020, 0xae6be681, 0x41395060, + 0xabbf8d02, 0x44ed3be3, 0xa5c33187, 0x4a918766, 0xa0175a04, + 0x4f45ece5, 0xb93a488d, 0x5668fe6c, 0xbcee230e, 0x53bc95ef, + 0xb2929f8b, 0x5dc0296a, 0xb746f408, 0x581442e9, 0x80c8ba99, + 0x6f9a0c78, 0x851cd11a, 0x6a4e67fb, 0x8b606d9f, 0x6432db7e, + 0x8eb4061c, 0x61e6b0fd, 0x97991495, 0x78cba274, 0x924d7f16, + 0x7d1fc9f7, 0x9c31c393, 0x73637572, 0x99e5a810, 0x76b71ef1, + 0xf32d5eb1, 0x1c7fe850, 0xf6f93532, 0x19ab83d3, 0xf88589b7, + 0x17d73f56, 0xfd51e234, 0x120354d5, 0xe47cf0bd, 0x0b2e465c, + 0xe1a89b3e, 0x0efa2ddf, 0xefd427bb, 0x0086915a, 0xea004c38, + 0x0552fad9, 0xdd8e02a9, 0x32dcb448, 0xd85a692a, 0x3708dfcb, + 0xd626d5af, 0x3974634e, 0xd3f2be2c, 0x3ca008cd, 0xcadfaca5, + 0x258d1a44, 0xcf0bc726, 0x205971c7, 0xc1777ba3, 0x2e25cd42, + 0xc4a31020, 0x2bf1a6c1, 0x14e696e1, 0xfbb42000, 0x1132fd62, + 0xfe604b83, 0x1f4e41e7, 0xf01cf706, 0x1a9a2a64, 0xf5c89c85, + 0x03b738ed, 0xece58e0c, 0x0663536e, 0xe931e58f, 0x081fefeb, + 0xe74d590a, 0x0dcb8468, 0xe2993289, 0x3a45caf9, 0xd5177c18, + 0x3f91a17a, 0xd0c3179b, 0x31ed1dff, 0xdebfab1e, 0x3439767c, + 0xdb6bc09d, 0x2d1464f5, 0xc246d214, 0x28c00f76, 0xc792b997, + 0x26bcb3f3, 0xc9ee0512, 0x2368d870, 0xcc3a6e91, 0x49a02ed1, + 0xa6f29830, 0x4c744552, 0xa326f3b3, 0x4208f9d7, 0xad5a4f36, + 0x47dc9254, 0xa88e24b5, 0x5ef180dd, 0xb1a3363c, 0x5b25eb5e, + 0xb4775dbf, 0x555957db, 0xba0be13a, 0x508d3c58, 0xbfdf8ab9, + 0x670372c9, 0x8851c428, 0x62d7194a, 0x8d85afab, 0x6caba5cf, + 0x83f9132e, 0x697fce4c, 0x862d78ad, 0x7052dcc5, 0x9f006a24, + 0x7586b746, 0x9ad401a7, 0x7bfa0bc3, 0x94a8bd22, 0x7e2e6040, + 0x917cd6a1}, + {0x00000000, 0x87a6cb43, 0xd43c90c7, 0x539a5b84, 0x730827cf, + 0xf4aeec8c, 0xa734b708, 0x20927c4b, 0xe6104f9e, 0x61b684dd, + 0x322cdf59, 0xb58a141a, 0x95186851, 0x12bea312, 0x4124f896, + 0xc68233d5, 0x1751997d, 0x90f7523e, 0xc36d09ba, 0x44cbc2f9, + 0x6459beb2, 0xe3ff75f1, 0xb0652e75, 0x37c3e536, 0xf141d6e3, + 0x76e71da0, 0x257d4624, 0xa2db8d67, 0x8249f12c, 0x05ef3a6f, + 0x567561eb, 0xd1d3aaa8, 0x2ea332fa, 0xa905f9b9, 0xfa9fa23d, + 0x7d39697e, 0x5dab1535, 0xda0dde76, 0x899785f2, 0x0e314eb1, + 0xc8b37d64, 0x4f15b627, 0x1c8feda3, 0x9b2926e0, 0xbbbb5aab, + 0x3c1d91e8, 0x6f87ca6c, 0xe821012f, 0x39f2ab87, 0xbe5460c4, + 0xedce3b40, 0x6a68f003, 0x4afa8c48, 0xcd5c470b, 0x9ec61c8f, + 0x1960d7cc, 0xdfe2e419, 0x58442f5a, 0x0bde74de, 0x8c78bf9d, + 0xaceac3d6, 0x2b4c0895, 0x78d65311, 0xff709852, 0x5d4665f4, + 0xdae0aeb7, 0x897af533, 0x0edc3e70, 0x2e4e423b, 0xa9e88978, + 0xfa72d2fc, 0x7dd419bf, 0xbb562a6a, 0x3cf0e129, 0x6f6abaad, + 0xe8cc71ee, 0xc85e0da5, 0x4ff8c6e6, 0x1c629d62, 0x9bc45621, + 0x4a17fc89, 0xcdb137ca, 0x9e2b6c4e, 0x198da70d, 0x391fdb46, + 0xbeb91005, 0xed234b81, 0x6a8580c2, 0xac07b317, 0x2ba17854, + 0x783b23d0, 0xff9de893, 0xdf0f94d8, 0x58a95f9b, 0x0b33041f, + 0x8c95cf5c, 0x73e5570e, 0xf4439c4d, 0xa7d9c7c9, 0x207f0c8a, + 0x00ed70c1, 0x874bbb82, 0xd4d1e006, 0x53772b45, 0x95f51890, + 0x1253d3d3, 0x41c98857, 0xc66f4314, 0xe6fd3f5f, 0x615bf41c, + 0x32c1af98, 0xb56764db, 0x64b4ce73, 0xe3120530, 0xb0885eb4, + 0x372e95f7, 0x17bce9bc, 0x901a22ff, 0xc380797b, 0x4426b238, + 0x82a481ed, 0x05024aae, 0x5698112a, 0xd13eda69, 0xf1aca622, + 0x760a6d61, 0x259036e5, 0xa236fda6, 0xba8ccbe8, 0x3d2a00ab, + 0x6eb05b2f, 0xe916906c, 0xc984ec27, 0x4e222764, 0x1db87ce0, + 0x9a1eb7a3, 0x5c9c8476, 0xdb3a4f35, 0x88a014b1, 0x0f06dff2, + 0x2f94a3b9, 0xa83268fa, 0xfba8337e, 0x7c0ef83d, 0xaddd5295, + 0x2a7b99d6, 0x79e1c252, 0xfe470911, 0xded5755a, 0x5973be19, + 0x0ae9e59d, 0x8d4f2ede, 0x4bcd1d0b, 0xcc6bd648, 0x9ff18dcc, + 0x1857468f, 0x38c53ac4, 0xbf63f187, 0xecf9aa03, 0x6b5f6140, + 0x942ff912, 0x13893251, 0x401369d5, 0xc7b5a296, 0xe727dedd, + 0x6081159e, 0x331b4e1a, 0xb4bd8559, 0x723fb68c, 0xf5997dcf, + 0xa603264b, 0x21a5ed08, 0x01379143, 0x86915a00, 0xd50b0184, + 0x52adcac7, 0x837e606f, 0x04d8ab2c, 0x5742f0a8, 0xd0e43beb, + 0xf07647a0, 0x77d08ce3, 0x244ad767, 0xa3ec1c24, 0x656e2ff1, + 0xe2c8e4b2, 0xb152bf36, 0x36f47475, 0x1666083e, 0x91c0c37d, + 0xc25a98f9, 0x45fc53ba, 0xe7caae1c, 0x606c655f, 0x33f63edb, + 0xb450f598, 0x94c289d3, 0x13644290, 0x40fe1914, 0xc758d257, + 0x01dae182, 0x867c2ac1, 0xd5e67145, 0x5240ba06, 0x72d2c64d, + 0xf5740d0e, 0xa6ee568a, 0x21489dc9, 0xf09b3761, 0x773dfc22, + 0x24a7a7a6, 0xa3016ce5, 0x839310ae, 0x0435dbed, 0x57af8069, + 0xd0094b2a, 0x168b78ff, 0x912db3bc, 0xc2b7e838, 0x4511237b, + 0x65835f30, 0xe2259473, 0xb1bfcff7, 0x361904b4, 0xc9699ce6, + 0x4ecf57a5, 0x1d550c21, 0x9af3c762, 0xba61bb29, 0x3dc7706a, + 0x6e5d2bee, 0xe9fbe0ad, 0x2f79d378, 0xa8df183b, 0xfb4543bf, + 0x7ce388fc, 0x5c71f4b7, 0xdbd73ff4, 0x884d6470, 0x0febaf33, + 0xde38059b, 0x599eced8, 0x0a04955c, 0x8da25e1f, 0xad302254, + 0x2a96e917, 0x790cb293, 0xfeaa79d0, 0x38284a05, 0xbf8e8146, + 0xec14dac2, 0x6bb21181, 0x4b206dca, 0xcc86a689, 0x9f1cfd0d, + 0x18ba364e}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0x43cba68700000000, 0xc7903cd400000000, + 0x845b9a5300000000, 0xcf27087300000000, 0x8cecaef400000000, + 0x08b734a700000000, 0x4b7c922000000000, 0x9e4f10e600000000, + 0xdd84b66100000000, 0x59df2c3200000000, 0x1a148ab500000000, + 0x5168189500000000, 0x12a3be1200000000, 0x96f8244100000000, + 0xd53382c600000000, 0x7d99511700000000, 0x3e52f79000000000, + 0xba096dc300000000, 0xf9c2cb4400000000, 0xb2be596400000000, + 0xf175ffe300000000, 0x752e65b000000000, 0x36e5c33700000000, + 0xe3d641f100000000, 0xa01de77600000000, 0x24467d2500000000, + 0x678ddba200000000, 0x2cf1498200000000, 0x6f3aef0500000000, + 0xeb61755600000000, 0xa8aad3d100000000, 0xfa32a32e00000000, + 0xb9f905a900000000, 0x3da29ffa00000000, 0x7e69397d00000000, + 0x3515ab5d00000000, 0x76de0dda00000000, 0xf285978900000000, + 0xb14e310e00000000, 0x647db3c800000000, 0x27b6154f00000000, + 0xa3ed8f1c00000000, 0xe026299b00000000, 0xab5abbbb00000000, + 0xe8911d3c00000000, 0x6cca876f00000000, 0x2f0121e800000000, + 0x87abf23900000000, 0xc46054be00000000, 0x403bceed00000000, + 0x03f0686a00000000, 0x488cfa4a00000000, 0x0b475ccd00000000, + 0x8f1cc69e00000000, 0xccd7601900000000, 0x19e4e2df00000000, + 0x5a2f445800000000, 0xde74de0b00000000, 0x9dbf788c00000000, + 0xd6c3eaac00000000, 0x95084c2b00000000, 0x1153d67800000000, + 0x529870ff00000000, 0xf465465d00000000, 0xb7aee0da00000000, + 0x33f57a8900000000, 0x703edc0e00000000, 0x3b424e2e00000000, + 0x7889e8a900000000, 0xfcd272fa00000000, 0xbf19d47d00000000, + 0x6a2a56bb00000000, 0x29e1f03c00000000, 0xadba6a6f00000000, + 0xee71cce800000000, 0xa50d5ec800000000, 0xe6c6f84f00000000, + 0x629d621c00000000, 0x2156c49b00000000, 0x89fc174a00000000, + 0xca37b1cd00000000, 0x4e6c2b9e00000000, 0x0da78d1900000000, + 0x46db1f3900000000, 0x0510b9be00000000, 0x814b23ed00000000, + 0xc280856a00000000, 0x17b307ac00000000, 0x5478a12b00000000, + 0xd0233b7800000000, 0x93e89dff00000000, 0xd8940fdf00000000, + 0x9b5fa95800000000, 0x1f04330b00000000, 0x5ccf958c00000000, + 0x0e57e57300000000, 0x4d9c43f400000000, 0xc9c7d9a700000000, + 0x8a0c7f2000000000, 0xc170ed0000000000, 0x82bb4b8700000000, + 0x06e0d1d400000000, 0x452b775300000000, 0x9018f59500000000, + 0xd3d3531200000000, 0x5788c94100000000, 0x14436fc600000000, + 0x5f3ffde600000000, 0x1cf45b6100000000, 0x98afc13200000000, + 0xdb6467b500000000, 0x73ceb46400000000, 0x300512e300000000, + 0xb45e88b000000000, 0xf7952e3700000000, 0xbce9bc1700000000, + 0xff221a9000000000, 0x7b7980c300000000, 0x38b2264400000000, + 0xed81a48200000000, 0xae4a020500000000, 0x2a11985600000000, + 0x69da3ed100000000, 0x22a6acf100000000, 0x616d0a7600000000, + 0xe536902500000000, 0xa6fd36a200000000, 0xe8cb8cba00000000, + 0xab002a3d00000000, 0x2f5bb06e00000000, 0x6c9016e900000000, + 0x27ec84c900000000, 0x6427224e00000000, 0xe07cb81d00000000, + 0xa3b71e9a00000000, 0x76849c5c00000000, 0x354f3adb00000000, + 0xb114a08800000000, 0xf2df060f00000000, 0xb9a3942f00000000, + 0xfa6832a800000000, 0x7e33a8fb00000000, 0x3df80e7c00000000, + 0x9552ddad00000000, 0xd6997b2a00000000, 0x52c2e17900000000, + 0x110947fe00000000, 0x5a75d5de00000000, 0x19be735900000000, + 0x9de5e90a00000000, 0xde2e4f8d00000000, 0x0b1dcd4b00000000, + 0x48d66bcc00000000, 0xcc8df19f00000000, 0x8f46571800000000, + 0xc43ac53800000000, 0x87f163bf00000000, 0x03aaf9ec00000000, + 0x40615f6b00000000, 0x12f92f9400000000, 0x5132891300000000, + 0xd569134000000000, 0x96a2b5c700000000, 0xddde27e700000000, + 0x9e15816000000000, 0x1a4e1b3300000000, 0x5985bdb400000000, + 0x8cb63f7200000000, 0xcf7d99f500000000, 0x4b2603a600000000, + 0x08eda52100000000, 0x4391370100000000, 0x005a918600000000, + 0x84010bd500000000, 0xc7caad5200000000, 0x6f607e8300000000, + 0x2cabd80400000000, 0xa8f0425700000000, 0xeb3be4d000000000, + 0xa04776f000000000, 0xe38cd07700000000, 0x67d74a2400000000, + 0x241ceca300000000, 0xf12f6e6500000000, 0xb2e4c8e200000000, + 0x36bf52b100000000, 0x7574f43600000000, 0x3e08661600000000, + 0x7dc3c09100000000, 0xf9985ac200000000, 0xba53fc4500000000, + 0x1caecae700000000, 0x5f656c6000000000, 0xdb3ef63300000000, + 0x98f550b400000000, 0xd389c29400000000, 0x9042641300000000, + 0x1419fe4000000000, 0x57d258c700000000, 0x82e1da0100000000, + 0xc12a7c8600000000, 0x4571e6d500000000, 0x06ba405200000000, + 0x4dc6d27200000000, 0x0e0d74f500000000, 0x8a56eea600000000, + 0xc99d482100000000, 0x61379bf000000000, 0x22fc3d7700000000, + 0xa6a7a72400000000, 0xe56c01a300000000, 0xae10938300000000, + 0xeddb350400000000, 0x6980af5700000000, 0x2a4b09d000000000, + 0xff788b1600000000, 0xbcb32d9100000000, 0x38e8b7c200000000, + 0x7b23114500000000, 0x305f836500000000, 0x739425e200000000, + 0xf7cfbfb100000000, 0xb404193600000000, 0xe69c69c900000000, + 0xa557cf4e00000000, 0x210c551d00000000, 0x62c7f39a00000000, + 0x29bb61ba00000000, 0x6a70c73d00000000, 0xee2b5d6e00000000, + 0xade0fbe900000000, 0x78d3792f00000000, 0x3b18dfa800000000, + 0xbf4345fb00000000, 0xfc88e37c00000000, 0xb7f4715c00000000, + 0xf43fd7db00000000, 0x70644d8800000000, 0x33afeb0f00000000, + 0x9b0538de00000000, 0xd8ce9e5900000000, 0x5c95040a00000000, + 0x1f5ea28d00000000, 0x542230ad00000000, 0x17e9962a00000000, + 0x93b20c7900000000, 0xd079aafe00000000, 0x054a283800000000, + 0x46818ebf00000000, 0xc2da14ec00000000, 0x8111b26b00000000, + 0xca6d204b00000000, 0x89a686cc00000000, 0x0dfd1c9f00000000, + 0x4e36ba1800000000}, + {0x0000000000000000, 0xe1b652ef00000000, 0x836bd40500000000, + 0x62dd86ea00000000, 0x06d7a80b00000000, 0xe761fae400000000, + 0x85bc7c0e00000000, 0x640a2ee100000000, 0x0cae511700000000, + 0xed1803f800000000, 0x8fc5851200000000, 0x6e73d7fd00000000, + 0x0a79f91c00000000, 0xebcfabf300000000, 0x89122d1900000000, + 0x68a47ff600000000, 0x185ca32e00000000, 0xf9eaf1c100000000, + 0x9b37772b00000000, 0x7a8125c400000000, 0x1e8b0b2500000000, + 0xff3d59ca00000000, 0x9de0df2000000000, 0x7c568dcf00000000, + 0x14f2f23900000000, 0xf544a0d600000000, 0x9799263c00000000, + 0x762f74d300000000, 0x12255a3200000000, 0xf39308dd00000000, + 0x914e8e3700000000, 0x70f8dcd800000000, 0x30b8465d00000000, + 0xd10e14b200000000, 0xb3d3925800000000, 0x5265c0b700000000, + 0x366fee5600000000, 0xd7d9bcb900000000, 0xb5043a5300000000, + 0x54b268bc00000000, 0x3c16174a00000000, 0xdda045a500000000, + 0xbf7dc34f00000000, 0x5ecb91a000000000, 0x3ac1bf4100000000, + 0xdb77edae00000000, 0xb9aa6b4400000000, 0x581c39ab00000000, + 0x28e4e57300000000, 0xc952b79c00000000, 0xab8f317600000000, + 0x4a39639900000000, 0x2e334d7800000000, 0xcf851f9700000000, + 0xad58997d00000000, 0x4ceecb9200000000, 0x244ab46400000000, + 0xc5fce68b00000000, 0xa721606100000000, 0x4697328e00000000, + 0x229d1c6f00000000, 0xc32b4e8000000000, 0xa1f6c86a00000000, + 0x40409a8500000000, 0x60708dba00000000, 0x81c6df5500000000, + 0xe31b59bf00000000, 0x02ad0b5000000000, 0x66a725b100000000, + 0x8711775e00000000, 0xe5ccf1b400000000, 0x047aa35b00000000, + 0x6cdedcad00000000, 0x8d688e4200000000, 0xefb508a800000000, + 0x0e035a4700000000, 0x6a0974a600000000, 0x8bbf264900000000, + 0xe962a0a300000000, 0x08d4f24c00000000, 0x782c2e9400000000, + 0x999a7c7b00000000, 0xfb47fa9100000000, 0x1af1a87e00000000, + 0x7efb869f00000000, 0x9f4dd47000000000, 0xfd90529a00000000, + 0x1c26007500000000, 0x74827f8300000000, 0x95342d6c00000000, + 0xf7e9ab8600000000, 0x165ff96900000000, 0x7255d78800000000, + 0x93e3856700000000, 0xf13e038d00000000, 0x1088516200000000, + 0x50c8cbe700000000, 0xb17e990800000000, 0xd3a31fe200000000, + 0x32154d0d00000000, 0x561f63ec00000000, 0xb7a9310300000000, + 0xd574b7e900000000, 0x34c2e50600000000, 0x5c669af000000000, + 0xbdd0c81f00000000, 0xdf0d4ef500000000, 0x3ebb1c1a00000000, + 0x5ab132fb00000000, 0xbb07601400000000, 0xd9dae6fe00000000, + 0x386cb41100000000, 0x489468c900000000, 0xa9223a2600000000, + 0xcbffbccc00000000, 0x2a49ee2300000000, 0x4e43c0c200000000, + 0xaff5922d00000000, 0xcd2814c700000000, 0x2c9e462800000000, + 0x443a39de00000000, 0xa58c6b3100000000, 0xc751eddb00000000, + 0x26e7bf3400000000, 0x42ed91d500000000, 0xa35bc33a00000000, + 0xc18645d000000000, 0x2030173f00000000, 0x81e66bae00000000, + 0x6050394100000000, 0x028dbfab00000000, 0xe33bed4400000000, + 0x8731c3a500000000, 0x6687914a00000000, 0x045a17a000000000, + 0xe5ec454f00000000, 0x8d483ab900000000, 0x6cfe685600000000, + 0x0e23eebc00000000, 0xef95bc5300000000, 0x8b9f92b200000000, + 0x6a29c05d00000000, 0x08f446b700000000, 0xe942145800000000, + 0x99bac88000000000, 0x780c9a6f00000000, 0x1ad11c8500000000, + 0xfb674e6a00000000, 0x9f6d608b00000000, 0x7edb326400000000, + 0x1c06b48e00000000, 0xfdb0e66100000000, 0x9514999700000000, + 0x74a2cb7800000000, 0x167f4d9200000000, 0xf7c91f7d00000000, + 0x93c3319c00000000, 0x7275637300000000, 0x10a8e59900000000, + 0xf11eb77600000000, 0xb15e2df300000000, 0x50e87f1c00000000, + 0x3235f9f600000000, 0xd383ab1900000000, 0xb78985f800000000, + 0x563fd71700000000, 0x34e251fd00000000, 0xd554031200000000, + 0xbdf07ce400000000, 0x5c462e0b00000000, 0x3e9ba8e100000000, + 0xdf2dfa0e00000000, 0xbb27d4ef00000000, 0x5a91860000000000, + 0x384c00ea00000000, 0xd9fa520500000000, 0xa9028edd00000000, + 0x48b4dc3200000000, 0x2a695ad800000000, 0xcbdf083700000000, + 0xafd526d600000000, 0x4e63743900000000, 0x2cbef2d300000000, + 0xcd08a03c00000000, 0xa5acdfca00000000, 0x441a8d2500000000, + 0x26c70bcf00000000, 0xc771592000000000, 0xa37b77c100000000, + 0x42cd252e00000000, 0x2010a3c400000000, 0xc1a6f12b00000000, + 0xe196e61400000000, 0x0020b4fb00000000, 0x62fd321100000000, + 0x834b60fe00000000, 0xe7414e1f00000000, 0x06f71cf000000000, + 0x642a9a1a00000000, 0x859cc8f500000000, 0xed38b70300000000, + 0x0c8ee5ec00000000, 0x6e53630600000000, 0x8fe531e900000000, + 0xebef1f0800000000, 0x0a594de700000000, 0x6884cb0d00000000, + 0x893299e200000000, 0xf9ca453a00000000, 0x187c17d500000000, + 0x7aa1913f00000000, 0x9b17c3d000000000, 0xff1ded3100000000, + 0x1eabbfde00000000, 0x7c76393400000000, 0x9dc06bdb00000000, + 0xf564142d00000000, 0x14d246c200000000, 0x760fc02800000000, + 0x97b992c700000000, 0xf3b3bc2600000000, 0x1205eec900000000, + 0x70d8682300000000, 0x916e3acc00000000, 0xd12ea04900000000, + 0x3098f2a600000000, 0x5245744c00000000, 0xb3f326a300000000, + 0xd7f9084200000000, 0x364f5aad00000000, 0x5492dc4700000000, + 0xb5248ea800000000, 0xdd80f15e00000000, 0x3c36a3b100000000, + 0x5eeb255b00000000, 0xbf5d77b400000000, 0xdb57595500000000, + 0x3ae10bba00000000, 0x583c8d5000000000, 0xb98adfbf00000000, + 0xc972036700000000, 0x28c4518800000000, 0x4a19d76200000000, + 0xabaf858d00000000, 0xcfa5ab6c00000000, 0x2e13f98300000000, + 0x4cce7f6900000000, 0xad782d8600000000, 0xc5dc527000000000, + 0x246a009f00000000, 0x46b7867500000000, 0xa701d49a00000000, + 0xc30bfa7b00000000, 0x22bda89400000000, 0x40602e7e00000000, + 0xa1d67c9100000000}, + {0x0000000000000000, 0x5880e2d700000000, 0xf106b47400000000, + 0xa98656a300000000, 0xe20d68e900000000, 0xba8d8a3e00000000, + 0x130bdc9d00000000, 0x4b8b3e4a00000000, 0x851da10900000000, + 0xdd9d43de00000000, 0x741b157d00000000, 0x2c9bf7aa00000000, + 0x6710c9e000000000, 0x3f902b3700000000, 0x96167d9400000000, + 0xce969f4300000000, 0x0a3b421300000000, 0x52bba0c400000000, + 0xfb3df66700000000, 0xa3bd14b000000000, 0xe8362afa00000000, + 0xb0b6c82d00000000, 0x19309e8e00000000, 0x41b07c5900000000, + 0x8f26e31a00000000, 0xd7a601cd00000000, 0x7e20576e00000000, + 0x26a0b5b900000000, 0x6d2b8bf300000000, 0x35ab692400000000, + 0x9c2d3f8700000000, 0xc4addd5000000000, 0x1476842600000000, + 0x4cf666f100000000, 0xe570305200000000, 0xbdf0d28500000000, + 0xf67beccf00000000, 0xaefb0e1800000000, 0x077d58bb00000000, + 0x5ffdba6c00000000, 0x916b252f00000000, 0xc9ebc7f800000000, + 0x606d915b00000000, 0x38ed738c00000000, 0x73664dc600000000, + 0x2be6af1100000000, 0x8260f9b200000000, 0xdae01b6500000000, + 0x1e4dc63500000000, 0x46cd24e200000000, 0xef4b724100000000, + 0xb7cb909600000000, 0xfc40aedc00000000, 0xa4c04c0b00000000, + 0x0d461aa800000000, 0x55c6f87f00000000, 0x9b50673c00000000, + 0xc3d085eb00000000, 0x6a56d34800000000, 0x32d6319f00000000, + 0x795d0fd500000000, 0x21dded0200000000, 0x885bbba100000000, + 0xd0db597600000000, 0x28ec084d00000000, 0x706cea9a00000000, + 0xd9eabc3900000000, 0x816a5eee00000000, 0xcae160a400000000, + 0x9261827300000000, 0x3be7d4d000000000, 0x6367360700000000, + 0xadf1a94400000000, 0xf5714b9300000000, 0x5cf71d3000000000, + 0x0477ffe700000000, 0x4ffcc1ad00000000, 0x177c237a00000000, + 0xbefa75d900000000, 0xe67a970e00000000, 0x22d74a5e00000000, + 0x7a57a88900000000, 0xd3d1fe2a00000000, 0x8b511cfd00000000, + 0xc0da22b700000000, 0x985ac06000000000, 0x31dc96c300000000, + 0x695c741400000000, 0xa7caeb5700000000, 0xff4a098000000000, + 0x56cc5f2300000000, 0x0e4cbdf400000000, 0x45c783be00000000, + 0x1d47616900000000, 0xb4c137ca00000000, 0xec41d51d00000000, + 0x3c9a8c6b00000000, 0x641a6ebc00000000, 0xcd9c381f00000000, + 0x951cdac800000000, 0xde97e48200000000, 0x8617065500000000, + 0x2f9150f600000000, 0x7711b22100000000, 0xb9872d6200000000, + 0xe107cfb500000000, 0x4881991600000000, 0x10017bc100000000, + 0x5b8a458b00000000, 0x030aa75c00000000, 0xaa8cf1ff00000000, + 0xf20c132800000000, 0x36a1ce7800000000, 0x6e212caf00000000, + 0xc7a77a0c00000000, 0x9f2798db00000000, 0xd4aca69100000000, + 0x8c2c444600000000, 0x25aa12e500000000, 0x7d2af03200000000, + 0xb3bc6f7100000000, 0xeb3c8da600000000, 0x42badb0500000000, + 0x1a3a39d200000000, 0x51b1079800000000, 0x0931e54f00000000, + 0xa0b7b3ec00000000, 0xf837513b00000000, 0x50d8119a00000000, + 0x0858f34d00000000, 0xa1dea5ee00000000, 0xf95e473900000000, + 0xb2d5797300000000, 0xea559ba400000000, 0x43d3cd0700000000, + 0x1b532fd000000000, 0xd5c5b09300000000, 0x8d45524400000000, + 0x24c304e700000000, 0x7c43e63000000000, 0x37c8d87a00000000, + 0x6f483aad00000000, 0xc6ce6c0e00000000, 0x9e4e8ed900000000, + 0x5ae3538900000000, 0x0263b15e00000000, 0xabe5e7fd00000000, + 0xf365052a00000000, 0xb8ee3b6000000000, 0xe06ed9b700000000, + 0x49e88f1400000000, 0x11686dc300000000, 0xdffef28000000000, + 0x877e105700000000, 0x2ef846f400000000, 0x7678a42300000000, + 0x3df39a6900000000, 0x657378be00000000, 0xccf52e1d00000000, + 0x9475ccca00000000, 0x44ae95bc00000000, 0x1c2e776b00000000, + 0xb5a821c800000000, 0xed28c31f00000000, 0xa6a3fd5500000000, + 0xfe231f8200000000, 0x57a5492100000000, 0x0f25abf600000000, + 0xc1b334b500000000, 0x9933d66200000000, 0x30b580c100000000, + 0x6835621600000000, 0x23be5c5c00000000, 0x7b3ebe8b00000000, + 0xd2b8e82800000000, 0x8a380aff00000000, 0x4e95d7af00000000, + 0x1615357800000000, 0xbf9363db00000000, 0xe713810c00000000, + 0xac98bf4600000000, 0xf4185d9100000000, 0x5d9e0b3200000000, + 0x051ee9e500000000, 0xcb8876a600000000, 0x9308947100000000, + 0x3a8ec2d200000000, 0x620e200500000000, 0x29851e4f00000000, + 0x7105fc9800000000, 0xd883aa3b00000000, 0x800348ec00000000, + 0x783419d700000000, 0x20b4fb0000000000, 0x8932ada300000000, + 0xd1b24f7400000000, 0x9a39713e00000000, 0xc2b993e900000000, + 0x6b3fc54a00000000, 0x33bf279d00000000, 0xfd29b8de00000000, + 0xa5a95a0900000000, 0x0c2f0caa00000000, 0x54afee7d00000000, + 0x1f24d03700000000, 0x47a432e000000000, 0xee22644300000000, + 0xb6a2869400000000, 0x720f5bc400000000, 0x2a8fb91300000000, + 0x8309efb000000000, 0xdb890d6700000000, 0x9002332d00000000, + 0xc882d1fa00000000, 0x6104875900000000, 0x3984658e00000000, + 0xf712facd00000000, 0xaf92181a00000000, 0x06144eb900000000, + 0x5e94ac6e00000000, 0x151f922400000000, 0x4d9f70f300000000, + 0xe419265000000000, 0xbc99c48700000000, 0x6c429df100000000, + 0x34c27f2600000000, 0x9d44298500000000, 0xc5c4cb5200000000, + 0x8e4ff51800000000, 0xd6cf17cf00000000, 0x7f49416c00000000, + 0x27c9a3bb00000000, 0xe95f3cf800000000, 0xb1dfde2f00000000, + 0x1859888c00000000, 0x40d96a5b00000000, 0x0b52541100000000, + 0x53d2b6c600000000, 0xfa54e06500000000, 0xa2d402b200000000, + 0x6679dfe200000000, 0x3ef93d3500000000, 0x977f6b9600000000, + 0xcfff894100000000, 0x8474b70b00000000, 0xdcf455dc00000000, + 0x7572037f00000000, 0x2df2e1a800000000, 0xe3647eeb00000000, + 0xbbe49c3c00000000, 0x1262ca9f00000000, 0x4ae2284800000000, + 0x0169160200000000, 0x59e9f4d500000000, 0xf06fa27600000000, + 0xa8ef40a100000000}, + {0x0000000000000000, 0x463b676500000000, 0x8c76ceca00000000, + 0xca4da9af00000000, 0x59ebed4e00000000, 0x1fd08a2b00000000, + 0xd59d238400000000, 0x93a644e100000000, 0xb2d6db9d00000000, + 0xf4edbcf800000000, 0x3ea0155700000000, 0x789b723200000000, + 0xeb3d36d300000000, 0xad0651b600000000, 0x674bf81900000000, + 0x21709f7c00000000, 0x25abc6e000000000, 0x6390a18500000000, + 0xa9dd082a00000000, 0xefe66f4f00000000, 0x7c402bae00000000, + 0x3a7b4ccb00000000, 0xf036e56400000000, 0xb60d820100000000, + 0x977d1d7d00000000, 0xd1467a1800000000, 0x1b0bd3b700000000, + 0x5d30b4d200000000, 0xce96f03300000000, 0x88ad975600000000, + 0x42e03ef900000000, 0x04db599c00000000, 0x0b50fc1a00000000, + 0x4d6b9b7f00000000, 0x872632d000000000, 0xc11d55b500000000, + 0x52bb115400000000, 0x1480763100000000, 0xdecddf9e00000000, + 0x98f6b8fb00000000, 0xb986278700000000, 0xffbd40e200000000, + 0x35f0e94d00000000, 0x73cb8e2800000000, 0xe06dcac900000000, + 0xa656adac00000000, 0x6c1b040300000000, 0x2a20636600000000, + 0x2efb3afa00000000, 0x68c05d9f00000000, 0xa28df43000000000, + 0xe4b6935500000000, 0x7710d7b400000000, 0x312bb0d100000000, + 0xfb66197e00000000, 0xbd5d7e1b00000000, 0x9c2de16700000000, + 0xda16860200000000, 0x105b2fad00000000, 0x566048c800000000, + 0xc5c60c2900000000, 0x83fd6b4c00000000, 0x49b0c2e300000000, + 0x0f8ba58600000000, 0x16a0f83500000000, 0x509b9f5000000000, + 0x9ad636ff00000000, 0xdced519a00000000, 0x4f4b157b00000000, + 0x0970721e00000000, 0xc33ddbb100000000, 0x8506bcd400000000, + 0xa47623a800000000, 0xe24d44cd00000000, 0x2800ed6200000000, + 0x6e3b8a0700000000, 0xfd9dcee600000000, 0xbba6a98300000000, + 0x71eb002c00000000, 0x37d0674900000000, 0x330b3ed500000000, + 0x753059b000000000, 0xbf7df01f00000000, 0xf946977a00000000, + 0x6ae0d39b00000000, 0x2cdbb4fe00000000, 0xe6961d5100000000, + 0xa0ad7a3400000000, 0x81dde54800000000, 0xc7e6822d00000000, + 0x0dab2b8200000000, 0x4b904ce700000000, 0xd836080600000000, + 0x9e0d6f6300000000, 0x5440c6cc00000000, 0x127ba1a900000000, + 0x1df0042f00000000, 0x5bcb634a00000000, 0x9186cae500000000, + 0xd7bdad8000000000, 0x441be96100000000, 0x02208e0400000000, + 0xc86d27ab00000000, 0x8e5640ce00000000, 0xaf26dfb200000000, + 0xe91db8d700000000, 0x2350117800000000, 0x656b761d00000000, + 0xf6cd32fc00000000, 0xb0f6559900000000, 0x7abbfc3600000000, + 0x3c809b5300000000, 0x385bc2cf00000000, 0x7e60a5aa00000000, + 0xb42d0c0500000000, 0xf2166b6000000000, 0x61b02f8100000000, + 0x278b48e400000000, 0xedc6e14b00000000, 0xabfd862e00000000, + 0x8a8d195200000000, 0xccb67e3700000000, 0x06fbd79800000000, + 0x40c0b0fd00000000, 0xd366f41c00000000, 0x955d937900000000, + 0x5f103ad600000000, 0x192b5db300000000, 0x2c40f16b00000000, + 0x6a7b960e00000000, 0xa0363fa100000000, 0xe60d58c400000000, + 0x75ab1c2500000000, 0x33907b4000000000, 0xf9ddd2ef00000000, + 0xbfe6b58a00000000, 0x9e962af600000000, 0xd8ad4d9300000000, + 0x12e0e43c00000000, 0x54db835900000000, 0xc77dc7b800000000, + 0x8146a0dd00000000, 0x4b0b097200000000, 0x0d306e1700000000, + 0x09eb378b00000000, 0x4fd050ee00000000, 0x859df94100000000, + 0xc3a69e2400000000, 0x5000dac500000000, 0x163bbda000000000, + 0xdc76140f00000000, 0x9a4d736a00000000, 0xbb3dec1600000000, + 0xfd068b7300000000, 0x374b22dc00000000, 0x717045b900000000, + 0xe2d6015800000000, 0xa4ed663d00000000, 0x6ea0cf9200000000, + 0x289ba8f700000000, 0x27100d7100000000, 0x612b6a1400000000, + 0xab66c3bb00000000, 0xed5da4de00000000, 0x7efbe03f00000000, + 0x38c0875a00000000, 0xf28d2ef500000000, 0xb4b6499000000000, + 0x95c6d6ec00000000, 0xd3fdb18900000000, 0x19b0182600000000, + 0x5f8b7f4300000000, 0xcc2d3ba200000000, 0x8a165cc700000000, + 0x405bf56800000000, 0x0660920d00000000, 0x02bbcb9100000000, + 0x4480acf400000000, 0x8ecd055b00000000, 0xc8f6623e00000000, + 0x5b5026df00000000, 0x1d6b41ba00000000, 0xd726e81500000000, + 0x911d8f7000000000, 0xb06d100c00000000, 0xf656776900000000, + 0x3c1bdec600000000, 0x7a20b9a300000000, 0xe986fd4200000000, + 0xafbd9a2700000000, 0x65f0338800000000, 0x23cb54ed00000000, + 0x3ae0095e00000000, 0x7cdb6e3b00000000, 0xb696c79400000000, + 0xf0ada0f100000000, 0x630be41000000000, 0x2530837500000000, + 0xef7d2ada00000000, 0xa9464dbf00000000, 0x8836d2c300000000, + 0xce0db5a600000000, 0x04401c0900000000, 0x427b7b6c00000000, + 0xd1dd3f8d00000000, 0x97e658e800000000, 0x5dabf14700000000, + 0x1b90962200000000, 0x1f4bcfbe00000000, 0x5970a8db00000000, + 0x933d017400000000, 0xd506661100000000, 0x46a022f000000000, + 0x009b459500000000, 0xcad6ec3a00000000, 0x8ced8b5f00000000, + 0xad9d142300000000, 0xeba6734600000000, 0x21ebdae900000000, + 0x67d0bd8c00000000, 0xf476f96d00000000, 0xb24d9e0800000000, + 0x780037a700000000, 0x3e3b50c200000000, 0x31b0f54400000000, + 0x778b922100000000, 0xbdc63b8e00000000, 0xfbfd5ceb00000000, + 0x685b180a00000000, 0x2e607f6f00000000, 0xe42dd6c000000000, + 0xa216b1a500000000, 0x83662ed900000000, 0xc55d49bc00000000, + 0x0f10e01300000000, 0x492b877600000000, 0xda8dc39700000000, + 0x9cb6a4f200000000, 0x56fb0d5d00000000, 0x10c06a3800000000, + 0x141b33a400000000, 0x522054c100000000, 0x986dfd6e00000000, + 0xde569a0b00000000, 0x4df0deea00000000, 0x0bcbb98f00000000, + 0xc186102000000000, 0x87bd774500000000, 0xa6cde83900000000, + 0xe0f68f5c00000000, 0x2abb26f300000000, 0x6c80419600000000, + 0xff26057700000000, 0xb91d621200000000, 0x7350cbbd00000000, + 0x356bacd800000000}, + {0x0000000000000000, 0x9e83da9f00000000, 0x7d01c4e400000000, + 0xe3821e7b00000000, 0xbb04f91200000000, 0x2587238d00000000, + 0xc6053df600000000, 0x5886e76900000000, 0x7609f22500000000, + 0xe88a28ba00000000, 0x0b0836c100000000, 0x958bec5e00000000, + 0xcd0d0b3700000000, 0x538ed1a800000000, 0xb00ccfd300000000, + 0x2e8f154c00000000, 0xec12e44b00000000, 0x72913ed400000000, + 0x911320af00000000, 0x0f90fa3000000000, 0x57161d5900000000, + 0xc995c7c600000000, 0x2a17d9bd00000000, 0xb494032200000000, + 0x9a1b166e00000000, 0x0498ccf100000000, 0xe71ad28a00000000, + 0x7999081500000000, 0x211fef7c00000000, 0xbf9c35e300000000, + 0x5c1e2b9800000000, 0xc29df10700000000, 0xd825c89700000000, + 0x46a6120800000000, 0xa5240c7300000000, 0x3ba7d6ec00000000, + 0x6321318500000000, 0xfda2eb1a00000000, 0x1e20f56100000000, + 0x80a32ffe00000000, 0xae2c3ab200000000, 0x30afe02d00000000, + 0xd32dfe5600000000, 0x4dae24c900000000, 0x1528c3a000000000, + 0x8bab193f00000000, 0x6829074400000000, 0xf6aadddb00000000, + 0x34372cdc00000000, 0xaab4f64300000000, 0x4936e83800000000, + 0xd7b532a700000000, 0x8f33d5ce00000000, 0x11b00f5100000000, + 0xf232112a00000000, 0x6cb1cbb500000000, 0x423edef900000000, + 0xdcbd046600000000, 0x3f3f1a1d00000000, 0xa1bcc08200000000, + 0xf93a27eb00000000, 0x67b9fd7400000000, 0x843be30f00000000, + 0x1ab8399000000000, 0xf14de1f400000000, 0x6fce3b6b00000000, + 0x8c4c251000000000, 0x12cfff8f00000000, 0x4a4918e600000000, + 0xd4cac27900000000, 0x3748dc0200000000, 0xa9cb069d00000000, + 0x874413d100000000, 0x19c7c94e00000000, 0xfa45d73500000000, + 0x64c60daa00000000, 0x3c40eac300000000, 0xa2c3305c00000000, + 0x41412e2700000000, 0xdfc2f4b800000000, 0x1d5f05bf00000000, + 0x83dcdf2000000000, 0x605ec15b00000000, 0xfedd1bc400000000, + 0xa65bfcad00000000, 0x38d8263200000000, 0xdb5a384900000000, + 0x45d9e2d600000000, 0x6b56f79a00000000, 0xf5d52d0500000000, + 0x1657337e00000000, 0x88d4e9e100000000, 0xd0520e8800000000, + 0x4ed1d41700000000, 0xad53ca6c00000000, 0x33d010f300000000, + 0x2968296300000000, 0xb7ebf3fc00000000, 0x5469ed8700000000, + 0xcaea371800000000, 0x926cd07100000000, 0x0cef0aee00000000, + 0xef6d149500000000, 0x71eece0a00000000, 0x5f61db4600000000, + 0xc1e201d900000000, 0x22601fa200000000, 0xbce3c53d00000000, + 0xe465225400000000, 0x7ae6f8cb00000000, 0x9964e6b000000000, + 0x07e73c2f00000000, 0xc57acd2800000000, 0x5bf917b700000000, + 0xb87b09cc00000000, 0x26f8d35300000000, 0x7e7e343a00000000, + 0xe0fdeea500000000, 0x037ff0de00000000, 0x9dfc2a4100000000, + 0xb3733f0d00000000, 0x2df0e59200000000, 0xce72fbe900000000, + 0x50f1217600000000, 0x0877c61f00000000, 0x96f41c8000000000, + 0x757602fb00000000, 0xebf5d86400000000, 0xa39db33200000000, + 0x3d1e69ad00000000, 0xde9c77d600000000, 0x401fad4900000000, + 0x18994a2000000000, 0x861a90bf00000000, 0x65988ec400000000, + 0xfb1b545b00000000, 0xd594411700000000, 0x4b179b8800000000, + 0xa89585f300000000, 0x36165f6c00000000, 0x6e90b80500000000, + 0xf013629a00000000, 0x13917ce100000000, 0x8d12a67e00000000, + 0x4f8f577900000000, 0xd10c8de600000000, 0x328e939d00000000, + 0xac0d490200000000, 0xf48bae6b00000000, 0x6a0874f400000000, + 0x898a6a8f00000000, 0x1709b01000000000, 0x3986a55c00000000, + 0xa7057fc300000000, 0x448761b800000000, 0xda04bb2700000000, + 0x82825c4e00000000, 0x1c0186d100000000, 0xff8398aa00000000, + 0x6100423500000000, 0x7bb87ba500000000, 0xe53ba13a00000000, + 0x06b9bf4100000000, 0x983a65de00000000, 0xc0bc82b700000000, + 0x5e3f582800000000, 0xbdbd465300000000, 0x233e9ccc00000000, + 0x0db1898000000000, 0x9332531f00000000, 0x70b04d6400000000, + 0xee3397fb00000000, 0xb6b5709200000000, 0x2836aa0d00000000, + 0xcbb4b47600000000, 0x55376ee900000000, 0x97aa9fee00000000, + 0x0929457100000000, 0xeaab5b0a00000000, 0x7428819500000000, + 0x2cae66fc00000000, 0xb22dbc6300000000, 0x51afa21800000000, + 0xcf2c788700000000, 0xe1a36dcb00000000, 0x7f20b75400000000, + 0x9ca2a92f00000000, 0x022173b000000000, 0x5aa794d900000000, + 0xc4244e4600000000, 0x27a6503d00000000, 0xb9258aa200000000, + 0x52d052c600000000, 0xcc53885900000000, 0x2fd1962200000000, + 0xb1524cbd00000000, 0xe9d4abd400000000, 0x7757714b00000000, + 0x94d56f3000000000, 0x0a56b5af00000000, 0x24d9a0e300000000, + 0xba5a7a7c00000000, 0x59d8640700000000, 0xc75bbe9800000000, + 0x9fdd59f100000000, 0x015e836e00000000, 0xe2dc9d1500000000, + 0x7c5f478a00000000, 0xbec2b68d00000000, 0x20416c1200000000, + 0xc3c3726900000000, 0x5d40a8f600000000, 0x05c64f9f00000000, + 0x9b45950000000000, 0x78c78b7b00000000, 0xe64451e400000000, + 0xc8cb44a800000000, 0x56489e3700000000, 0xb5ca804c00000000, + 0x2b495ad300000000, 0x73cfbdba00000000, 0xed4c672500000000, + 0x0ece795e00000000, 0x904da3c100000000, 0x8af59a5100000000, + 0x147640ce00000000, 0xf7f45eb500000000, 0x6977842a00000000, + 0x31f1634300000000, 0xaf72b9dc00000000, 0x4cf0a7a700000000, + 0xd2737d3800000000, 0xfcfc687400000000, 0x627fb2eb00000000, + 0x81fdac9000000000, 0x1f7e760f00000000, 0x47f8916600000000, + 0xd97b4bf900000000, 0x3af9558200000000, 0xa47a8f1d00000000, + 0x66e77e1a00000000, 0xf864a48500000000, 0x1be6bafe00000000, + 0x8565606100000000, 0xdde3870800000000, 0x43605d9700000000, + 0xa0e243ec00000000, 0x3e61997300000000, 0x10ee8c3f00000000, + 0x8e6d56a000000000, 0x6def48db00000000, 0xf36c924400000000, + 0xabea752d00000000, 0x3569afb200000000, 0xd6ebb1c900000000, + 0x48686b5600000000}, + {0x0000000000000000, 0xc064281700000000, 0x80c9502e00000000, + 0x40ad783900000000, 0x0093a15c00000000, 0xc0f7894b00000000, + 0x805af17200000000, 0x403ed96500000000, 0x002643b900000000, + 0xc0426bae00000000, 0x80ef139700000000, 0x408b3b8000000000, + 0x00b5e2e500000000, 0xc0d1caf200000000, 0x807cb2cb00000000, + 0x40189adc00000000, 0x414af7a900000000, 0x812edfbe00000000, + 0xc183a78700000000, 0x01e78f9000000000, 0x41d956f500000000, + 0x81bd7ee200000000, 0xc11006db00000000, 0x01742ecc00000000, + 0x416cb41000000000, 0x81089c0700000000, 0xc1a5e43e00000000, + 0x01c1cc2900000000, 0x41ff154c00000000, 0x819b3d5b00000000, + 0xc136456200000000, 0x01526d7500000000, 0xc3929f8800000000, + 0x03f6b79f00000000, 0x435bcfa600000000, 0x833fe7b100000000, + 0xc3013ed400000000, 0x036516c300000000, 0x43c86efa00000000, + 0x83ac46ed00000000, 0xc3b4dc3100000000, 0x03d0f42600000000, + 0x437d8c1f00000000, 0x8319a40800000000, 0xc3277d6d00000000, + 0x0343557a00000000, 0x43ee2d4300000000, 0x838a055400000000, + 0x82d8682100000000, 0x42bc403600000000, 0x0211380f00000000, + 0xc275101800000000, 0x824bc97d00000000, 0x422fe16a00000000, + 0x0282995300000000, 0xc2e6b14400000000, 0x82fe2b9800000000, + 0x429a038f00000000, 0x02377bb600000000, 0xc25353a100000000, + 0x826d8ac400000000, 0x4209a2d300000000, 0x02a4daea00000000, + 0xc2c0f2fd00000000, 0xc7234eca00000000, 0x074766dd00000000, + 0x47ea1ee400000000, 0x878e36f300000000, 0xc7b0ef9600000000, + 0x07d4c78100000000, 0x4779bfb800000000, 0x871d97af00000000, + 0xc7050d7300000000, 0x0761256400000000, 0x47cc5d5d00000000, + 0x87a8754a00000000, 0xc796ac2f00000000, 0x07f2843800000000, + 0x475ffc0100000000, 0x873bd41600000000, 0x8669b96300000000, + 0x460d917400000000, 0x06a0e94d00000000, 0xc6c4c15a00000000, + 0x86fa183f00000000, 0x469e302800000000, 0x0633481100000000, + 0xc657600600000000, 0x864ffada00000000, 0x462bd2cd00000000, + 0x0686aaf400000000, 0xc6e282e300000000, 0x86dc5b8600000000, + 0x46b8739100000000, 0x06150ba800000000, 0xc67123bf00000000, + 0x04b1d14200000000, 0xc4d5f95500000000, 0x8478816c00000000, + 0x441ca97b00000000, 0x0422701e00000000, 0xc446580900000000, + 0x84eb203000000000, 0x448f082700000000, 0x049792fb00000000, + 0xc4f3baec00000000, 0x845ec2d500000000, 0x443aeac200000000, + 0x040433a700000000, 0xc4601bb000000000, 0x84cd638900000000, + 0x44a94b9e00000000, 0x45fb26eb00000000, 0x859f0efc00000000, + 0xc53276c500000000, 0x05565ed200000000, 0x456887b700000000, + 0x850cafa000000000, 0xc5a1d79900000000, 0x05c5ff8e00000000, + 0x45dd655200000000, 0x85b94d4500000000, 0xc514357c00000000, + 0x05701d6b00000000, 0x454ec40e00000000, 0x852aec1900000000, + 0xc587942000000000, 0x05e3bc3700000000, 0xcf41ed4f00000000, + 0x0f25c55800000000, 0x4f88bd6100000000, 0x8fec957600000000, + 0xcfd24c1300000000, 0x0fb6640400000000, 0x4f1b1c3d00000000, + 0x8f7f342a00000000, 0xcf67aef600000000, 0x0f0386e100000000, + 0x4faefed800000000, 0x8fcad6cf00000000, 0xcff40faa00000000, + 0x0f9027bd00000000, 0x4f3d5f8400000000, 0x8f59779300000000, + 0x8e0b1ae600000000, 0x4e6f32f100000000, 0x0ec24ac800000000, + 0xcea662df00000000, 0x8e98bbba00000000, 0x4efc93ad00000000, + 0x0e51eb9400000000, 0xce35c38300000000, 0x8e2d595f00000000, + 0x4e49714800000000, 0x0ee4097100000000, 0xce80216600000000, + 0x8ebef80300000000, 0x4edad01400000000, 0x0e77a82d00000000, + 0xce13803a00000000, 0x0cd372c700000000, 0xccb75ad000000000, + 0x8c1a22e900000000, 0x4c7e0afe00000000, 0x0c40d39b00000000, + 0xcc24fb8c00000000, 0x8c8983b500000000, 0x4cedaba200000000, + 0x0cf5317e00000000, 0xcc91196900000000, 0x8c3c615000000000, + 0x4c58494700000000, 0x0c66902200000000, 0xcc02b83500000000, + 0x8cafc00c00000000, 0x4ccbe81b00000000, 0x4d99856e00000000, + 0x8dfdad7900000000, 0xcd50d54000000000, 0x0d34fd5700000000, + 0x4d0a243200000000, 0x8d6e0c2500000000, 0xcdc3741c00000000, + 0x0da75c0b00000000, 0x4dbfc6d700000000, 0x8ddbeec000000000, + 0xcd7696f900000000, 0x0d12beee00000000, 0x4d2c678b00000000, + 0x8d484f9c00000000, 0xcde537a500000000, 0x0d811fb200000000, + 0x0862a38500000000, 0xc8068b9200000000, 0x88abf3ab00000000, + 0x48cfdbbc00000000, 0x08f102d900000000, 0xc8952ace00000000, + 0x883852f700000000, 0x485c7ae000000000, 0x0844e03c00000000, + 0xc820c82b00000000, 0x888db01200000000, 0x48e9980500000000, + 0x08d7416000000000, 0xc8b3697700000000, 0x881e114e00000000, + 0x487a395900000000, 0x4928542c00000000, 0x894c7c3b00000000, + 0xc9e1040200000000, 0x09852c1500000000, 0x49bbf57000000000, + 0x89dfdd6700000000, 0xc972a55e00000000, 0x09168d4900000000, + 0x490e179500000000, 0x896a3f8200000000, 0xc9c747bb00000000, + 0x09a36fac00000000, 0x499db6c900000000, 0x89f99ede00000000, + 0xc954e6e700000000, 0x0930cef000000000, 0xcbf03c0d00000000, + 0x0b94141a00000000, 0x4b396c2300000000, 0x8b5d443400000000, + 0xcb639d5100000000, 0x0b07b54600000000, 0x4baacd7f00000000, + 0x8bcee56800000000, 0xcbd67fb400000000, 0x0bb257a300000000, + 0x4b1f2f9a00000000, 0x8b7b078d00000000, 0xcb45dee800000000, + 0x0b21f6ff00000000, 0x4b8c8ec600000000, 0x8be8a6d100000000, + 0x8abacba400000000, 0x4adee3b300000000, 0x0a739b8a00000000, + 0xca17b39d00000000, 0x8a296af800000000, 0x4a4d42ef00000000, + 0x0ae03ad600000000, 0xca8412c100000000, 0x8a9c881d00000000, + 0x4af8a00a00000000, 0x0a55d83300000000, 0xca31f02400000000, + 0x8a0f294100000000, 0x4a6b015600000000, 0x0ac6796f00000000, + 0xcaa2517800000000}, + {0x0000000000000000, 0xd4ea739b00000000, 0xe9d396ed00000000, + 0x3d39e57600000000, 0x93a15c0000000000, 0x474b2f9b00000000, + 0x7a72caed00000000, 0xae98b97600000000, 0x2643b90000000000, + 0xf2a9ca9b00000000, 0xcf902fed00000000, 0x1b7a5c7600000000, + 0xb5e2e50000000000, 0x6108969b00000000, 0x5c3173ed00000000, + 0x88db007600000000, 0x4c86720100000000, 0x986c019a00000000, + 0xa555e4ec00000000, 0x71bf977700000000, 0xdf272e0100000000, + 0x0bcd5d9a00000000, 0x36f4b8ec00000000, 0xe21ecb7700000000, + 0x6ac5cb0100000000, 0xbe2fb89a00000000, 0x83165dec00000000, + 0x57fc2e7700000000, 0xf964970100000000, 0x2d8ee49a00000000, + 0x10b701ec00000000, 0xc45d727700000000, 0x980ce50200000000, + 0x4ce6969900000000, 0x71df73ef00000000, 0xa535007400000000, + 0x0badb90200000000, 0xdf47ca9900000000, 0xe27e2fef00000000, + 0x36945c7400000000, 0xbe4f5c0200000000, 0x6aa52f9900000000, + 0x579ccaef00000000, 0x8376b97400000000, 0x2dee000200000000, + 0xf904739900000000, 0xc43d96ef00000000, 0x10d7e57400000000, + 0xd48a970300000000, 0x0060e49800000000, 0x3d5901ee00000000, + 0xe9b3727500000000, 0x472bcb0300000000, 0x93c1b89800000000, + 0xaef85dee00000000, 0x7a122e7500000000, 0xf2c92e0300000000, + 0x26235d9800000000, 0x1b1ab8ee00000000, 0xcff0cb7500000000, + 0x6168720300000000, 0xb582019800000000, 0x88bbe4ee00000000, + 0x5c51977500000000, 0x3019ca0500000000, 0xe4f3b99e00000000, + 0xd9ca5ce800000000, 0x0d202f7300000000, 0xa3b8960500000000, + 0x7752e59e00000000, 0x4a6b00e800000000, 0x9e81737300000000, + 0x165a730500000000, 0xc2b0009e00000000, 0xff89e5e800000000, + 0x2b63967300000000, 0x85fb2f0500000000, 0x51115c9e00000000, + 0x6c28b9e800000000, 0xb8c2ca7300000000, 0x7c9fb80400000000, + 0xa875cb9f00000000, 0x954c2ee900000000, 0x41a65d7200000000, + 0xef3ee40400000000, 0x3bd4979f00000000, 0x06ed72e900000000, + 0xd207017200000000, 0x5adc010400000000, 0x8e36729f00000000, + 0xb30f97e900000000, 0x67e5e47200000000, 0xc97d5d0400000000, + 0x1d972e9f00000000, 0x20aecbe900000000, 0xf444b87200000000, + 0xa8152f0700000000, 0x7cff5c9c00000000, 0x41c6b9ea00000000, + 0x952cca7100000000, 0x3bb4730700000000, 0xef5e009c00000000, + 0xd267e5ea00000000, 0x068d967100000000, 0x8e56960700000000, + 0x5abce59c00000000, 0x678500ea00000000, 0xb36f737100000000, + 0x1df7ca0700000000, 0xc91db99c00000000, 0xf4245cea00000000, + 0x20ce2f7100000000, 0xe4935d0600000000, 0x30792e9d00000000, + 0x0d40cbeb00000000, 0xd9aab87000000000, 0x7732010600000000, + 0xa3d8729d00000000, 0x9ee197eb00000000, 0x4a0be47000000000, + 0xc2d0e40600000000, 0x163a979d00000000, 0x2b0372eb00000000, + 0xffe9017000000000, 0x5171b80600000000, 0x859bcb9d00000000, + 0xb8a22eeb00000000, 0x6c485d7000000000, 0x6032940b00000000, + 0xb4d8e79000000000, 0x89e102e600000000, 0x5d0b717d00000000, + 0xf393c80b00000000, 0x2779bb9000000000, 0x1a405ee600000000, + 0xceaa2d7d00000000, 0x46712d0b00000000, 0x929b5e9000000000, + 0xafa2bbe600000000, 0x7b48c87d00000000, 0xd5d0710b00000000, + 0x013a029000000000, 0x3c03e7e600000000, 0xe8e9947d00000000, + 0x2cb4e60a00000000, 0xf85e959100000000, 0xc56770e700000000, + 0x118d037c00000000, 0xbf15ba0a00000000, 0x6bffc99100000000, + 0x56c62ce700000000, 0x822c5f7c00000000, 0x0af75f0a00000000, + 0xde1d2c9100000000, 0xe324c9e700000000, 0x37ceba7c00000000, + 0x9956030a00000000, 0x4dbc709100000000, 0x708595e700000000, + 0xa46fe67c00000000, 0xf83e710900000000, 0x2cd4029200000000, + 0x11ede7e400000000, 0xc507947f00000000, 0x6b9f2d0900000000, + 0xbf755e9200000000, 0x824cbbe400000000, 0x56a6c87f00000000, + 0xde7dc80900000000, 0x0a97bb9200000000, 0x37ae5ee400000000, + 0xe3442d7f00000000, 0x4ddc940900000000, 0x9936e79200000000, + 0xa40f02e400000000, 0x70e5717f00000000, 0xb4b8030800000000, + 0x6052709300000000, 0x5d6b95e500000000, 0x8981e67e00000000, + 0x27195f0800000000, 0xf3f32c9300000000, 0xcecac9e500000000, + 0x1a20ba7e00000000, 0x92fbba0800000000, 0x4611c99300000000, + 0x7b282ce500000000, 0xafc25f7e00000000, 0x015ae60800000000, + 0xd5b0959300000000, 0xe88970e500000000, 0x3c63037e00000000, + 0x502b5e0e00000000, 0x84c12d9500000000, 0xb9f8c8e300000000, + 0x6d12bb7800000000, 0xc38a020e00000000, 0x1760719500000000, + 0x2a5994e300000000, 0xfeb3e77800000000, 0x7668e70e00000000, + 0xa282949500000000, 0x9fbb71e300000000, 0x4b51027800000000, + 0xe5c9bb0e00000000, 0x3123c89500000000, 0x0c1a2de300000000, + 0xd8f05e7800000000, 0x1cad2c0f00000000, 0xc8475f9400000000, + 0xf57ebae200000000, 0x2194c97900000000, 0x8f0c700f00000000, + 0x5be6039400000000, 0x66dfe6e200000000, 0xb235957900000000, + 0x3aee950f00000000, 0xee04e69400000000, 0xd33d03e200000000, + 0x07d7707900000000, 0xa94fc90f00000000, 0x7da5ba9400000000, + 0x409c5fe200000000, 0x94762c7900000000, 0xc827bb0c00000000, + 0x1ccdc89700000000, 0x21f42de100000000, 0xf51e5e7a00000000, + 0x5b86e70c00000000, 0x8f6c949700000000, 0xb25571e100000000, + 0x66bf027a00000000, 0xee64020c00000000, 0x3a8e719700000000, + 0x07b794e100000000, 0xd35de77a00000000, 0x7dc55e0c00000000, + 0xa92f2d9700000000, 0x9416c8e100000000, 0x40fcbb7a00000000, + 0x84a1c90d00000000, 0x504bba9600000000, 0x6d725fe000000000, + 0xb9982c7b00000000, 0x1700950d00000000, 0xc3eae69600000000, + 0xfed303e000000000, 0x2a39707b00000000, 0xa2e2700d00000000, + 0x7608039600000000, 0x4b31e6e000000000, 0x9fdb957b00000000, + 0x31432c0d00000000, 0xe5a95f9600000000, 0xd890bae000000000, + 0x0c7ac97b00000000}, + {0x0000000000000000, 0x2765258100000000, 0x0fcc3bd900000000, + 0x28a91e5800000000, 0x5f9e066900000000, 0x78fb23e800000000, + 0x50523db000000000, 0x7737183100000000, 0xbe3c0dd200000000, + 0x9959285300000000, 0xb1f0360b00000000, 0x9695138a00000000, + 0xe1a20bbb00000000, 0xc6c72e3a00000000, 0xee6e306200000000, + 0xc90b15e300000000, 0x3d7f6b7f00000000, 0x1a1a4efe00000000, + 0x32b350a600000000, 0x15d6752700000000, 0x62e16d1600000000, + 0x4584489700000000, 0x6d2d56cf00000000, 0x4a48734e00000000, + 0x834366ad00000000, 0xa426432c00000000, 0x8c8f5d7400000000, + 0xabea78f500000000, 0xdcdd60c400000000, 0xfbb8454500000000, + 0xd3115b1d00000000, 0xf4747e9c00000000, 0x7afed6fe00000000, + 0x5d9bf37f00000000, 0x7532ed2700000000, 0x5257c8a600000000, + 0x2560d09700000000, 0x0205f51600000000, 0x2aaceb4e00000000, + 0x0dc9cecf00000000, 0xc4c2db2c00000000, 0xe3a7fead00000000, + 0xcb0ee0f500000000, 0xec6bc57400000000, 0x9b5cdd4500000000, + 0xbc39f8c400000000, 0x9490e69c00000000, 0xb3f5c31d00000000, + 0x4781bd8100000000, 0x60e4980000000000, 0x484d865800000000, + 0x6f28a3d900000000, 0x181fbbe800000000, 0x3f7a9e6900000000, + 0x17d3803100000000, 0x30b6a5b000000000, 0xf9bdb05300000000, + 0xded895d200000000, 0xf6718b8a00000000, 0xd114ae0b00000000, + 0xa623b63a00000000, 0x814693bb00000000, 0xa9ef8de300000000, + 0x8e8aa86200000000, 0xb5fadc2600000000, 0x929ff9a700000000, + 0xba36e7ff00000000, 0x9d53c27e00000000, 0xea64da4f00000000, + 0xcd01ffce00000000, 0xe5a8e19600000000, 0xc2cdc41700000000, + 0x0bc6d1f400000000, 0x2ca3f47500000000, 0x040aea2d00000000, + 0x236fcfac00000000, 0x5458d79d00000000, 0x733df21c00000000, + 0x5b94ec4400000000, 0x7cf1c9c500000000, 0x8885b75900000000, + 0xafe092d800000000, 0x87498c8000000000, 0xa02ca90100000000, + 0xd71bb13000000000, 0xf07e94b100000000, 0xd8d78ae900000000, + 0xffb2af6800000000, 0x36b9ba8b00000000, 0x11dc9f0a00000000, + 0x3975815200000000, 0x1e10a4d300000000, 0x6927bce200000000, + 0x4e42996300000000, 0x66eb873b00000000, 0x418ea2ba00000000, + 0xcf040ad800000000, 0xe8612f5900000000, 0xc0c8310100000000, + 0xe7ad148000000000, 0x909a0cb100000000, 0xb7ff293000000000, + 0x9f56376800000000, 0xb83312e900000000, 0x7138070a00000000, + 0x565d228b00000000, 0x7ef43cd300000000, 0x5991195200000000, + 0x2ea6016300000000, 0x09c324e200000000, 0x216a3aba00000000, + 0x060f1f3b00000000, 0xf27b61a700000000, 0xd51e442600000000, + 0xfdb75a7e00000000, 0xdad27fff00000000, 0xade567ce00000000, + 0x8a80424f00000000, 0xa2295c1700000000, 0x854c799600000000, + 0x4c476c7500000000, 0x6b2249f400000000, 0x438b57ac00000000, + 0x64ee722d00000000, 0x13d96a1c00000000, 0x34bc4f9d00000000, + 0x1c1551c500000000, 0x3b70744400000000, 0x6af5b94d00000000, + 0x4d909ccc00000000, 0x6539829400000000, 0x425ca71500000000, + 0x356bbf2400000000, 0x120e9aa500000000, 0x3aa784fd00000000, + 0x1dc2a17c00000000, 0xd4c9b49f00000000, 0xf3ac911e00000000, + 0xdb058f4600000000, 0xfc60aac700000000, 0x8b57b2f600000000, + 0xac32977700000000, 0x849b892f00000000, 0xa3feacae00000000, + 0x578ad23200000000, 0x70eff7b300000000, 0x5846e9eb00000000, + 0x7f23cc6a00000000, 0x0814d45b00000000, 0x2f71f1da00000000, + 0x07d8ef8200000000, 0x20bdca0300000000, 0xe9b6dfe000000000, + 0xced3fa6100000000, 0xe67ae43900000000, 0xc11fc1b800000000, + 0xb628d98900000000, 0x914dfc0800000000, 0xb9e4e25000000000, + 0x9e81c7d100000000, 0x100b6fb300000000, 0x376e4a3200000000, + 0x1fc7546a00000000, 0x38a271eb00000000, 0x4f9569da00000000, + 0x68f04c5b00000000, 0x4059520300000000, 0x673c778200000000, + 0xae37626100000000, 0x895247e000000000, 0xa1fb59b800000000, + 0x869e7c3900000000, 0xf1a9640800000000, 0xd6cc418900000000, + 0xfe655fd100000000, 0xd9007a5000000000, 0x2d7404cc00000000, + 0x0a11214d00000000, 0x22b83f1500000000, 0x05dd1a9400000000, + 0x72ea02a500000000, 0x558f272400000000, 0x7d26397c00000000, + 0x5a431cfd00000000, 0x9348091e00000000, 0xb42d2c9f00000000, + 0x9c8432c700000000, 0xbbe1174600000000, 0xccd60f7700000000, + 0xebb32af600000000, 0xc31a34ae00000000, 0xe47f112f00000000, + 0xdf0f656b00000000, 0xf86a40ea00000000, 0xd0c35eb200000000, + 0xf7a67b3300000000, 0x8091630200000000, 0xa7f4468300000000, + 0x8f5d58db00000000, 0xa8387d5a00000000, 0x613368b900000000, + 0x46564d3800000000, 0x6eff536000000000, 0x499a76e100000000, + 0x3ead6ed000000000, 0x19c84b5100000000, 0x3161550900000000, + 0x1604708800000000, 0xe2700e1400000000, 0xc5152b9500000000, + 0xedbc35cd00000000, 0xcad9104c00000000, 0xbdee087d00000000, + 0x9a8b2dfc00000000, 0xb22233a400000000, 0x9547162500000000, + 0x5c4c03c600000000, 0x7b29264700000000, 0x5380381f00000000, + 0x74e51d9e00000000, 0x03d205af00000000, 0x24b7202e00000000, + 0x0c1e3e7600000000, 0x2b7b1bf700000000, 0xa5f1b39500000000, + 0x8294961400000000, 0xaa3d884c00000000, 0x8d58adcd00000000, + 0xfa6fb5fc00000000, 0xdd0a907d00000000, 0xf5a38e2500000000, + 0xd2c6aba400000000, 0x1bcdbe4700000000, 0x3ca89bc600000000, + 0x1401859e00000000, 0x3364a01f00000000, 0x4453b82e00000000, + 0x63369daf00000000, 0x4b9f83f700000000, 0x6cfaa67600000000, + 0x988ed8ea00000000, 0xbfebfd6b00000000, 0x9742e33300000000, + 0xb027c6b200000000, 0xc710de8300000000, 0xe075fb0200000000, + 0xc8dce55a00000000, 0xefb9c0db00000000, 0x26b2d53800000000, + 0x01d7f0b900000000, 0x297eeee100000000, 0x0e1bcb6000000000, + 0x792cd35100000000, 0x5e49f6d000000000, 0x76e0e88800000000, + 0x5185cd0900000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x9ba54c6f, 0xec3b9e9f, 0x779ed2f0, 0x03063b7f, + 0x98a37710, 0xef3da5e0, 0x7498e98f, 0x060c76fe, 0x9da93a91, + 0xea37e861, 0x7192a40e, 0x050a4d81, 0x9eaf01ee, 0xe931d31e, + 0x72949f71, 0x0c18edfc, 0x97bda193, 0xe0237363, 0x7b863f0c, + 0x0f1ed683, 0x94bb9aec, 0xe325481c, 0x78800473, 0x0a149b02, + 0x91b1d76d, 0xe62f059d, 0x7d8a49f2, 0x0912a07d, 0x92b7ec12, + 0xe5293ee2, 0x7e8c728d, 0x1831dbf8, 0x83949797, 0xf40a4567, + 0x6faf0908, 0x1b37e087, 0x8092ace8, 0xf70c7e18, 0x6ca93277, + 0x1e3dad06, 0x8598e169, 0xf2063399, 0x69a37ff6, 0x1d3b9679, + 0x869eda16, 0xf10008e6, 0x6aa54489, 0x14293604, 0x8f8c7a6b, + 0xf812a89b, 0x63b7e4f4, 0x172f0d7b, 0x8c8a4114, 0xfb1493e4, + 0x60b1df8b, 0x122540fa, 0x89800c95, 0xfe1ede65, 0x65bb920a, + 0x11237b85, 0x8a8637ea, 0xfd18e51a, 0x66bda975, 0x3063b7f0, + 0xabc6fb9f, 0xdc58296f, 0x47fd6500, 0x33658c8f, 0xa8c0c0e0, + 0xdf5e1210, 0x44fb5e7f, 0x366fc10e, 0xadca8d61, 0xda545f91, + 0x41f113fe, 0x3569fa71, 0xaeccb61e, 0xd95264ee, 0x42f72881, + 0x3c7b5a0c, 0xa7de1663, 0xd040c493, 0x4be588fc, 0x3f7d6173, + 0xa4d82d1c, 0xd346ffec, 0x48e3b383, 0x3a772cf2, 0xa1d2609d, + 0xd64cb26d, 0x4de9fe02, 0x3971178d, 0xa2d45be2, 0xd54a8912, + 0x4eefc57d, 0x28526c08, 0xb3f72067, 0xc469f297, 0x5fccbef8, + 0x2b545777, 0xb0f11b18, 0xc76fc9e8, 0x5cca8587, 0x2e5e1af6, + 0xb5fb5699, 0xc2658469, 0x59c0c806, 0x2d582189, 0xb6fd6de6, + 0xc163bf16, 0x5ac6f379, 0x244a81f4, 0xbfefcd9b, 0xc8711f6b, + 0x53d45304, 0x274cba8b, 0xbce9f6e4, 0xcb772414, 0x50d2687b, + 0x2246f70a, 0xb9e3bb65, 0xce7d6995, 0x55d825fa, 0x2140cc75, + 0xbae5801a, 0xcd7b52ea, 0x56de1e85, 0x60c76fe0, 0xfb62238f, + 0x8cfcf17f, 0x1759bd10, 0x63c1549f, 0xf86418f0, 0x8ffaca00, + 0x145f866f, 0x66cb191e, 0xfd6e5571, 0x8af08781, 0x1155cbee, + 0x65cd2261, 0xfe686e0e, 0x89f6bcfe, 0x1253f091, 0x6cdf821c, + 0xf77ace73, 0x80e41c83, 0x1b4150ec, 0x6fd9b963, 0xf47cf50c, + 0x83e227fc, 0x18476b93, 0x6ad3f4e2, 0xf176b88d, 0x86e86a7d, + 0x1d4d2612, 0x69d5cf9d, 0xf27083f2, 0x85ee5102, 0x1e4b1d6d, + 0x78f6b418, 0xe353f877, 0x94cd2a87, 0x0f6866e8, 0x7bf08f67, + 0xe055c308, 0x97cb11f8, 0x0c6e5d97, 0x7efac2e6, 0xe55f8e89, + 0x92c15c79, 0x09641016, 0x7dfcf999, 0xe659b5f6, 0x91c76706, + 0x0a622b69, 0x74ee59e4, 0xef4b158b, 0x98d5c77b, 0x03708b14, + 0x77e8629b, 0xec4d2ef4, 0x9bd3fc04, 0x0076b06b, 0x72e22f1a, + 0xe9476375, 0x9ed9b185, 0x057cfdea, 0x71e41465, 0xea41580a, + 0x9ddf8afa, 0x067ac695, 0x50a4d810, 0xcb01947f, 0xbc9f468f, + 0x273a0ae0, 0x53a2e36f, 0xc807af00, 0xbf997df0, 0x243c319f, + 0x56a8aeee, 0xcd0de281, 0xba933071, 0x21367c1e, 0x55ae9591, + 0xce0bd9fe, 0xb9950b0e, 0x22304761, 0x5cbc35ec, 0xc7197983, + 0xb087ab73, 0x2b22e71c, 0x5fba0e93, 0xc41f42fc, 0xb381900c, + 0x2824dc63, 0x5ab04312, 0xc1150f7d, 0xb68bdd8d, 0x2d2e91e2, + 0x59b6786d, 0xc2133402, 0xb58de6f2, 0x2e28aa9d, 0x489503e8, + 0xd3304f87, 0xa4ae9d77, 0x3f0bd118, 0x4b933897, 0xd03674f8, + 0xa7a8a608, 0x3c0dea67, 0x4e997516, 0xd53c3979, 0xa2a2eb89, + 0x3907a7e6, 0x4d9f4e69, 0xd63a0206, 0xa1a4d0f6, 0x3a019c99, + 0x448dee14, 0xdf28a27b, 0xa8b6708b, 0x33133ce4, 0x478bd56b, + 0xdc2e9904, 0xabb04bf4, 0x3015079b, 0x428198ea, 0xd924d485, + 0xaeba0675, 0x351f4a1a, 0x4187a395, 0xda22effa, 0xadbc3d0a, + 0x36197165}, + {0x00000000, 0xc18edfc0, 0x586cb9c1, 0x99e26601, 0xb0d97382, + 0x7157ac42, 0xe8b5ca43, 0x293b1583, 0xbac3e145, 0x7b4d3e85, + 0xe2af5884, 0x23218744, 0x0a1a92c7, 0xcb944d07, 0x52762b06, + 0x93f8f4c6, 0xaef6c4cb, 0x6f781b0b, 0xf69a7d0a, 0x3714a2ca, + 0x1e2fb749, 0xdfa16889, 0x46430e88, 0x87cdd148, 0x1435258e, + 0xd5bbfa4e, 0x4c599c4f, 0x8dd7438f, 0xa4ec560c, 0x656289cc, + 0xfc80efcd, 0x3d0e300d, 0x869c8fd7, 0x47125017, 0xdef03616, + 0x1f7ee9d6, 0x3645fc55, 0xf7cb2395, 0x6e294594, 0xafa79a54, + 0x3c5f6e92, 0xfdd1b152, 0x6433d753, 0xa5bd0893, 0x8c861d10, + 0x4d08c2d0, 0xd4eaa4d1, 0x15647b11, 0x286a4b1c, 0xe9e494dc, + 0x7006f2dd, 0xb1882d1d, 0x98b3389e, 0x593de75e, 0xc0df815f, + 0x01515e9f, 0x92a9aa59, 0x53277599, 0xcac51398, 0x0b4bcc58, + 0x2270d9db, 0xe3fe061b, 0x7a1c601a, 0xbb92bfda, 0xd64819ef, + 0x17c6c62f, 0x8e24a02e, 0x4faa7fee, 0x66916a6d, 0xa71fb5ad, + 0x3efdd3ac, 0xff730c6c, 0x6c8bf8aa, 0xad05276a, 0x34e7416b, + 0xf5699eab, 0xdc528b28, 0x1ddc54e8, 0x843e32e9, 0x45b0ed29, + 0x78bedd24, 0xb93002e4, 0x20d264e5, 0xe15cbb25, 0xc867aea6, + 0x09e97166, 0x900b1767, 0x5185c8a7, 0xc27d3c61, 0x03f3e3a1, + 0x9a1185a0, 0x5b9f5a60, 0x72a44fe3, 0xb32a9023, 0x2ac8f622, + 0xeb4629e2, 0x50d49638, 0x915a49f8, 0x08b82ff9, 0xc936f039, + 0xe00de5ba, 0x21833a7a, 0xb8615c7b, 0x79ef83bb, 0xea17777d, + 0x2b99a8bd, 0xb27bcebc, 0x73f5117c, 0x5ace04ff, 0x9b40db3f, + 0x02a2bd3e, 0xc32c62fe, 0xfe2252f3, 0x3fac8d33, 0xa64eeb32, + 0x67c034f2, 0x4efb2171, 0x8f75feb1, 0x169798b0, 0xd7194770, + 0x44e1b3b6, 0x856f6c76, 0x1c8d0a77, 0xdd03d5b7, 0xf438c034, + 0x35b61ff4, 0xac5479f5, 0x6ddaa635, 0x77e1359f, 0xb66fea5f, + 0x2f8d8c5e, 0xee03539e, 0xc738461d, 0x06b699dd, 0x9f54ffdc, + 0x5eda201c, 0xcd22d4da, 0x0cac0b1a, 0x954e6d1b, 0x54c0b2db, + 0x7dfba758, 0xbc757898, 0x25971e99, 0xe419c159, 0xd917f154, + 0x18992e94, 0x817b4895, 0x40f59755, 0x69ce82d6, 0xa8405d16, + 0x31a23b17, 0xf02ce4d7, 0x63d41011, 0xa25acfd1, 0x3bb8a9d0, + 0xfa367610, 0xd30d6393, 0x1283bc53, 0x8b61da52, 0x4aef0592, + 0xf17dba48, 0x30f36588, 0xa9110389, 0x689fdc49, 0x41a4c9ca, + 0x802a160a, 0x19c8700b, 0xd846afcb, 0x4bbe5b0d, 0x8a3084cd, + 0x13d2e2cc, 0xd25c3d0c, 0xfb67288f, 0x3ae9f74f, 0xa30b914e, + 0x62854e8e, 0x5f8b7e83, 0x9e05a143, 0x07e7c742, 0xc6691882, + 0xef520d01, 0x2edcd2c1, 0xb73eb4c0, 0x76b06b00, 0xe5489fc6, + 0x24c64006, 0xbd242607, 0x7caaf9c7, 0x5591ec44, 0x941f3384, + 0x0dfd5585, 0xcc738a45, 0xa1a92c70, 0x6027f3b0, 0xf9c595b1, + 0x384b4a71, 0x11705ff2, 0xd0fe8032, 0x491ce633, 0x889239f3, + 0x1b6acd35, 0xdae412f5, 0x430674f4, 0x8288ab34, 0xabb3beb7, + 0x6a3d6177, 0xf3df0776, 0x3251d8b6, 0x0f5fe8bb, 0xced1377b, + 0x5733517a, 0x96bd8eba, 0xbf869b39, 0x7e0844f9, 0xe7ea22f8, + 0x2664fd38, 0xb59c09fe, 0x7412d63e, 0xedf0b03f, 0x2c7e6fff, + 0x05457a7c, 0xc4cba5bc, 0x5d29c3bd, 0x9ca71c7d, 0x2735a3a7, + 0xe6bb7c67, 0x7f591a66, 0xbed7c5a6, 0x97ecd025, 0x56620fe5, + 0xcf8069e4, 0x0e0eb624, 0x9df642e2, 0x5c789d22, 0xc59afb23, + 0x041424e3, 0x2d2f3160, 0xeca1eea0, 0x754388a1, 0xb4cd5761, + 0x89c3676c, 0x484db8ac, 0xd1afdead, 0x1021016d, 0x391a14ee, + 0xf894cb2e, 0x6176ad2f, 0xa0f872ef, 0x33008629, 0xf28e59e9, + 0x6b6c3fe8, 0xaae2e028, 0x83d9f5ab, 0x42572a6b, 0xdbb54c6a, + 0x1a3b93aa}, + {0x00000000, 0xefc26b3e, 0x04f5d03d, 0xeb37bb03, 0x09eba07a, + 0xe629cb44, 0x0d1e7047, 0xe2dc1b79, 0x13d740f4, 0xfc152bca, + 0x172290c9, 0xf8e0fbf7, 0x1a3ce08e, 0xf5fe8bb0, 0x1ec930b3, + 0xf10b5b8d, 0x27ae81e8, 0xc86cead6, 0x235b51d5, 0xcc993aeb, + 0x2e452192, 0xc1874aac, 0x2ab0f1af, 0xc5729a91, 0x3479c11c, + 0xdbbbaa22, 0x308c1121, 0xdf4e7a1f, 0x3d926166, 0xd2500a58, + 0x3967b15b, 0xd6a5da65, 0x4f5d03d0, 0xa09f68ee, 0x4ba8d3ed, + 0xa46ab8d3, 0x46b6a3aa, 0xa974c894, 0x42437397, 0xad8118a9, + 0x5c8a4324, 0xb348281a, 0x587f9319, 0xb7bdf827, 0x5561e35e, + 0xbaa38860, 0x51943363, 0xbe56585d, 0x68f38238, 0x8731e906, + 0x6c065205, 0x83c4393b, 0x61182242, 0x8eda497c, 0x65edf27f, + 0x8a2f9941, 0x7b24c2cc, 0x94e6a9f2, 0x7fd112f1, 0x901379cf, + 0x72cf62b6, 0x9d0d0988, 0x763ab28b, 0x99f8d9b5, 0x9eba07a0, + 0x71786c9e, 0x9a4fd79d, 0x758dbca3, 0x9751a7da, 0x7893cce4, + 0x93a477e7, 0x7c661cd9, 0x8d6d4754, 0x62af2c6a, 0x89989769, + 0x665afc57, 0x8486e72e, 0x6b448c10, 0x80733713, 0x6fb15c2d, + 0xb9148648, 0x56d6ed76, 0xbde15675, 0x52233d4b, 0xb0ff2632, + 0x5f3d4d0c, 0xb40af60f, 0x5bc89d31, 0xaac3c6bc, 0x4501ad82, + 0xae361681, 0x41f47dbf, 0xa32866c6, 0x4cea0df8, 0xa7ddb6fb, + 0x481fddc5, 0xd1e70470, 0x3e256f4e, 0xd512d44d, 0x3ad0bf73, + 0xd80ca40a, 0x37cecf34, 0xdcf97437, 0x333b1f09, 0xc2304484, + 0x2df22fba, 0xc6c594b9, 0x2907ff87, 0xcbdbe4fe, 0x24198fc0, + 0xcf2e34c3, 0x20ec5ffd, 0xf6498598, 0x198beea6, 0xf2bc55a5, + 0x1d7e3e9b, 0xffa225e2, 0x10604edc, 0xfb57f5df, 0x14959ee1, + 0xe59ec56c, 0x0a5cae52, 0xe16b1551, 0x0ea97e6f, 0xec756516, + 0x03b70e28, 0xe880b52b, 0x0742de15, 0xe6050901, 0x09c7623f, + 0xe2f0d93c, 0x0d32b202, 0xefeea97b, 0x002cc245, 0xeb1b7946, + 0x04d91278, 0xf5d249f5, 0x1a1022cb, 0xf12799c8, 0x1ee5f2f6, + 0xfc39e98f, 0x13fb82b1, 0xf8cc39b2, 0x170e528c, 0xc1ab88e9, + 0x2e69e3d7, 0xc55e58d4, 0x2a9c33ea, 0xc8402893, 0x278243ad, + 0xccb5f8ae, 0x23779390, 0xd27cc81d, 0x3dbea323, 0xd6891820, + 0x394b731e, 0xdb976867, 0x34550359, 0xdf62b85a, 0x30a0d364, + 0xa9580ad1, 0x469a61ef, 0xadaddaec, 0x426fb1d2, 0xa0b3aaab, + 0x4f71c195, 0xa4467a96, 0x4b8411a8, 0xba8f4a25, 0x554d211b, + 0xbe7a9a18, 0x51b8f126, 0xb364ea5f, 0x5ca68161, 0xb7913a62, + 0x5853515c, 0x8ef68b39, 0x6134e007, 0x8a035b04, 0x65c1303a, + 0x871d2b43, 0x68df407d, 0x83e8fb7e, 0x6c2a9040, 0x9d21cbcd, + 0x72e3a0f3, 0x99d41bf0, 0x761670ce, 0x94ca6bb7, 0x7b080089, + 0x903fbb8a, 0x7ffdd0b4, 0x78bf0ea1, 0x977d659f, 0x7c4ade9c, + 0x9388b5a2, 0x7154aedb, 0x9e96c5e5, 0x75a17ee6, 0x9a6315d8, + 0x6b684e55, 0x84aa256b, 0x6f9d9e68, 0x805ff556, 0x6283ee2f, + 0x8d418511, 0x66763e12, 0x89b4552c, 0x5f118f49, 0xb0d3e477, + 0x5be45f74, 0xb426344a, 0x56fa2f33, 0xb938440d, 0x520fff0e, + 0xbdcd9430, 0x4cc6cfbd, 0xa304a483, 0x48331f80, 0xa7f174be, + 0x452d6fc7, 0xaaef04f9, 0x41d8bffa, 0xae1ad4c4, 0x37e20d71, + 0xd820664f, 0x3317dd4c, 0xdcd5b672, 0x3e09ad0b, 0xd1cbc635, + 0x3afc7d36, 0xd53e1608, 0x24354d85, 0xcbf726bb, 0x20c09db8, + 0xcf02f686, 0x2ddeedff, 0xc21c86c1, 0x292b3dc2, 0xc6e956fc, + 0x104c8c99, 0xff8ee7a7, 0x14b95ca4, 0xfb7b379a, 0x19a72ce3, + 0xf66547dd, 0x1d52fcde, 0xf29097e0, 0x039bcc6d, 0xec59a753, + 0x076e1c50, 0xe8ac776e, 0x0a706c17, 0xe5b20729, 0x0e85bc2a, + 0xe147d714}, + {0x00000000, 0x177b1443, 0x2ef62886, 0x398d3cc5, 0x5dec510c, + 0x4a97454f, 0x731a798a, 0x64616dc9, 0xbbd8a218, 0xaca3b65b, + 0x952e8a9e, 0x82559edd, 0xe634f314, 0xf14fe757, 0xc8c2db92, + 0xdfb9cfd1, 0xacc04271, 0xbbbb5632, 0x82366af7, 0x954d7eb4, + 0xf12c137d, 0xe657073e, 0xdfda3bfb, 0xc8a12fb8, 0x1718e069, + 0x0063f42a, 0x39eec8ef, 0x2e95dcac, 0x4af4b165, 0x5d8fa526, + 0x640299e3, 0x73798da0, 0x82f182a3, 0x958a96e0, 0xac07aa25, + 0xbb7cbe66, 0xdf1dd3af, 0xc866c7ec, 0xf1ebfb29, 0xe690ef6a, + 0x392920bb, 0x2e5234f8, 0x17df083d, 0x00a41c7e, 0x64c571b7, + 0x73be65f4, 0x4a335931, 0x5d484d72, 0x2e31c0d2, 0x394ad491, + 0x00c7e854, 0x17bcfc17, 0x73dd91de, 0x64a6859d, 0x5d2bb958, + 0x4a50ad1b, 0x95e962ca, 0x82927689, 0xbb1f4a4c, 0xac645e0f, + 0xc80533c6, 0xdf7e2785, 0xe6f31b40, 0xf1880f03, 0xde920307, + 0xc9e91744, 0xf0642b81, 0xe71f3fc2, 0x837e520b, 0x94054648, + 0xad887a8d, 0xbaf36ece, 0x654aa11f, 0x7231b55c, 0x4bbc8999, + 0x5cc79dda, 0x38a6f013, 0x2fdde450, 0x1650d895, 0x012bccd6, + 0x72524176, 0x65295535, 0x5ca469f0, 0x4bdf7db3, 0x2fbe107a, + 0x38c50439, 0x014838fc, 0x16332cbf, 0xc98ae36e, 0xdef1f72d, + 0xe77ccbe8, 0xf007dfab, 0x9466b262, 0x831da621, 0xba909ae4, + 0xadeb8ea7, 0x5c6381a4, 0x4b1895e7, 0x7295a922, 0x65eebd61, + 0x018fd0a8, 0x16f4c4eb, 0x2f79f82e, 0x3802ec6d, 0xe7bb23bc, + 0xf0c037ff, 0xc94d0b3a, 0xde361f79, 0xba5772b0, 0xad2c66f3, + 0x94a15a36, 0x83da4e75, 0xf0a3c3d5, 0xe7d8d796, 0xde55eb53, + 0xc92eff10, 0xad4f92d9, 0xba34869a, 0x83b9ba5f, 0x94c2ae1c, + 0x4b7b61cd, 0x5c00758e, 0x658d494b, 0x72f65d08, 0x169730c1, + 0x01ec2482, 0x38611847, 0x2f1a0c04, 0x6655004f, 0x712e140c, + 0x48a328c9, 0x5fd83c8a, 0x3bb95143, 0x2cc24500, 0x154f79c5, + 0x02346d86, 0xdd8da257, 0xcaf6b614, 0xf37b8ad1, 0xe4009e92, + 0x8061f35b, 0x971ae718, 0xae97dbdd, 0xb9eccf9e, 0xca95423e, + 0xddee567d, 0xe4636ab8, 0xf3187efb, 0x97791332, 0x80020771, + 0xb98f3bb4, 0xaef42ff7, 0x714de026, 0x6636f465, 0x5fbbc8a0, + 0x48c0dce3, 0x2ca1b12a, 0x3bdaa569, 0x025799ac, 0x152c8def, + 0xe4a482ec, 0xf3df96af, 0xca52aa6a, 0xdd29be29, 0xb948d3e0, + 0xae33c7a3, 0x97befb66, 0x80c5ef25, 0x5f7c20f4, 0x480734b7, + 0x718a0872, 0x66f11c31, 0x029071f8, 0x15eb65bb, 0x2c66597e, + 0x3b1d4d3d, 0x4864c09d, 0x5f1fd4de, 0x6692e81b, 0x71e9fc58, + 0x15889191, 0x02f385d2, 0x3b7eb917, 0x2c05ad54, 0xf3bc6285, + 0xe4c776c6, 0xdd4a4a03, 0xca315e40, 0xae503389, 0xb92b27ca, + 0x80a61b0f, 0x97dd0f4c, 0xb8c70348, 0xafbc170b, 0x96312bce, + 0x814a3f8d, 0xe52b5244, 0xf2504607, 0xcbdd7ac2, 0xdca66e81, + 0x031fa150, 0x1464b513, 0x2de989d6, 0x3a929d95, 0x5ef3f05c, + 0x4988e41f, 0x7005d8da, 0x677ecc99, 0x14074139, 0x037c557a, + 0x3af169bf, 0x2d8a7dfc, 0x49eb1035, 0x5e900476, 0x671d38b3, + 0x70662cf0, 0xafdfe321, 0xb8a4f762, 0x8129cba7, 0x9652dfe4, + 0xf233b22d, 0xe548a66e, 0xdcc59aab, 0xcbbe8ee8, 0x3a3681eb, + 0x2d4d95a8, 0x14c0a96d, 0x03bbbd2e, 0x67dad0e7, 0x70a1c4a4, + 0x492cf861, 0x5e57ec22, 0x81ee23f3, 0x969537b0, 0xaf180b75, + 0xb8631f36, 0xdc0272ff, 0xcb7966bc, 0xf2f45a79, 0xe58f4e3a, + 0x96f6c39a, 0x818dd7d9, 0xb800eb1c, 0xaf7bff5f, 0xcb1a9296, + 0xdc6186d5, 0xe5ecba10, 0xf297ae53, 0x2d2e6182, 0x3a5575c1, + 0x03d84904, 0x14a35d47, 0x70c2308e, 0x67b924cd, 0x5e341808, + 0x494f0c4b}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x43147b17, 0x8628f62e, 0xc53c8d39, 0x0c51ec5d, + 0x4f45974a, 0x8a791a73, 0xc96d6164, 0x18a2d8bb, 0x5bb6a3ac, + 0x9e8a2e95, 0xdd9e5582, 0x14f334e6, 0x57e74ff1, 0x92dbc2c8, + 0xd1cfb9df, 0x7142c0ac, 0x3256bbbb, 0xf76a3682, 0xb47e4d95, + 0x7d132cf1, 0x3e0757e6, 0xfb3bdadf, 0xb82fa1c8, 0x69e01817, + 0x2af46300, 0xefc8ee39, 0xacdc952e, 0x65b1f44a, 0x26a58f5d, + 0xe3990264, 0xa08d7973, 0xa382f182, 0xe0968a95, 0x25aa07ac, + 0x66be7cbb, 0xafd31ddf, 0xecc766c8, 0x29fbebf1, 0x6aef90e6, + 0xbb202939, 0xf834522e, 0x3d08df17, 0x7e1ca400, 0xb771c564, + 0xf465be73, 0x3159334a, 0x724d485d, 0xd2c0312e, 0x91d44a39, + 0x54e8c700, 0x17fcbc17, 0xde91dd73, 0x9d85a664, 0x58b92b5d, + 0x1bad504a, 0xca62e995, 0x89769282, 0x4c4a1fbb, 0x0f5e64ac, + 0xc63305c8, 0x85277edf, 0x401bf3e6, 0x030f88f1, 0x070392de, + 0x4417e9c9, 0x812b64f0, 0xc23f1fe7, 0x0b527e83, 0x48460594, + 0x8d7a88ad, 0xce6ef3ba, 0x1fa14a65, 0x5cb53172, 0x9989bc4b, + 0xda9dc75c, 0x13f0a638, 0x50e4dd2f, 0x95d85016, 0xd6cc2b01, + 0x76415272, 0x35552965, 0xf069a45c, 0xb37ddf4b, 0x7a10be2f, + 0x3904c538, 0xfc384801, 0xbf2c3316, 0x6ee38ac9, 0x2df7f1de, + 0xe8cb7ce7, 0xabdf07f0, 0x62b26694, 0x21a61d83, 0xe49a90ba, + 0xa78eebad, 0xa481635c, 0xe795184b, 0x22a99572, 0x61bdee65, + 0xa8d08f01, 0xebc4f416, 0x2ef8792f, 0x6dec0238, 0xbc23bbe7, + 0xff37c0f0, 0x3a0b4dc9, 0x791f36de, 0xb07257ba, 0xf3662cad, + 0x365aa194, 0x754eda83, 0xd5c3a3f0, 0x96d7d8e7, 0x53eb55de, + 0x10ff2ec9, 0xd9924fad, 0x9a8634ba, 0x5fbab983, 0x1caec294, + 0xcd617b4b, 0x8e75005c, 0x4b498d65, 0x085df672, 0xc1309716, + 0x8224ec01, 0x47186138, 0x040c1a2f, 0x4f005566, 0x0c142e71, + 0xc928a348, 0x8a3cd85f, 0x4351b93b, 0x0045c22c, 0xc5794f15, + 0x866d3402, 0x57a28ddd, 0x14b6f6ca, 0xd18a7bf3, 0x929e00e4, + 0x5bf36180, 0x18e71a97, 0xdddb97ae, 0x9ecfecb9, 0x3e4295ca, + 0x7d56eedd, 0xb86a63e4, 0xfb7e18f3, 0x32137997, 0x71070280, + 0xb43b8fb9, 0xf72ff4ae, 0x26e04d71, 0x65f43666, 0xa0c8bb5f, + 0xe3dcc048, 0x2ab1a12c, 0x69a5da3b, 0xac995702, 0xef8d2c15, + 0xec82a4e4, 0xaf96dff3, 0x6aaa52ca, 0x29be29dd, 0xe0d348b9, + 0xa3c733ae, 0x66fbbe97, 0x25efc580, 0xf4207c5f, 0xb7340748, + 0x72088a71, 0x311cf166, 0xf8719002, 0xbb65eb15, 0x7e59662c, + 0x3d4d1d3b, 0x9dc06448, 0xded41f5f, 0x1be89266, 0x58fce971, + 0x91918815, 0xd285f302, 0x17b97e3b, 0x54ad052c, 0x8562bcf3, + 0xc676c7e4, 0x034a4add, 0x405e31ca, 0x893350ae, 0xca272bb9, + 0x0f1ba680, 0x4c0fdd97, 0x4803c7b8, 0x0b17bcaf, 0xce2b3196, + 0x8d3f4a81, 0x44522be5, 0x074650f2, 0xc27addcb, 0x816ea6dc, + 0x50a11f03, 0x13b56414, 0xd689e92d, 0x959d923a, 0x5cf0f35e, + 0x1fe48849, 0xdad80570, 0x99cc7e67, 0x39410714, 0x7a557c03, + 0xbf69f13a, 0xfc7d8a2d, 0x3510eb49, 0x7604905e, 0xb3381d67, + 0xf02c6670, 0x21e3dfaf, 0x62f7a4b8, 0xa7cb2981, 0xe4df5296, + 0x2db233f2, 0x6ea648e5, 0xab9ac5dc, 0xe88ebecb, 0xeb81363a, + 0xa8954d2d, 0x6da9c014, 0x2ebdbb03, 0xe7d0da67, 0xa4c4a170, + 0x61f82c49, 0x22ec575e, 0xf323ee81, 0xb0379596, 0x750b18af, + 0x361f63b8, 0xff7202dc, 0xbc6679cb, 0x795af4f2, 0x3a4e8fe5, + 0x9ac3f696, 0xd9d78d81, 0x1ceb00b8, 0x5fff7baf, 0x96921acb, + 0xd58661dc, 0x10baece5, 0x53ae97f2, 0x82612e2d, 0xc175553a, + 0x0449d803, 0x475da314, 0x8e30c270, 0xcd24b967, 0x0818345e, + 0x4b0c4f49}, + {0x00000000, 0x3e6bc2ef, 0x3dd0f504, 0x03bb37eb, 0x7aa0eb09, + 0x44cb29e6, 0x47701e0d, 0x791bdce2, 0xf440d713, 0xca2b15fc, + 0xc9902217, 0xf7fbe0f8, 0x8ee03c1a, 0xb08bfef5, 0xb330c91e, + 0x8d5b0bf1, 0xe881ae27, 0xd6ea6cc8, 0xd5515b23, 0xeb3a99cc, + 0x9221452e, 0xac4a87c1, 0xaff1b02a, 0x919a72c5, 0x1cc17934, + 0x22aabbdb, 0x21118c30, 0x1f7a4edf, 0x6661923d, 0x580a50d2, + 0x5bb16739, 0x65daa5d6, 0xd0035d4f, 0xee689fa0, 0xedd3a84b, + 0xd3b86aa4, 0xaaa3b646, 0x94c874a9, 0x97734342, 0xa91881ad, + 0x24438a5c, 0x1a2848b3, 0x19937f58, 0x27f8bdb7, 0x5ee36155, + 0x6088a3ba, 0x63339451, 0x5d5856be, 0x3882f368, 0x06e93187, + 0x0552066c, 0x3b39c483, 0x42221861, 0x7c49da8e, 0x7ff2ed65, + 0x41992f8a, 0xccc2247b, 0xf2a9e694, 0xf112d17f, 0xcf791390, + 0xb662cf72, 0x88090d9d, 0x8bb23a76, 0xb5d9f899, 0xa007ba9e, + 0x9e6c7871, 0x9dd74f9a, 0xa3bc8d75, 0xdaa75197, 0xe4cc9378, + 0xe777a493, 0xd91c667c, 0x54476d8d, 0x6a2caf62, 0x69979889, + 0x57fc5a66, 0x2ee78684, 0x108c446b, 0x13377380, 0x2d5cb16f, + 0x488614b9, 0x76edd656, 0x7556e1bd, 0x4b3d2352, 0x3226ffb0, + 0x0c4d3d5f, 0x0ff60ab4, 0x319dc85b, 0xbcc6c3aa, 0x82ad0145, + 0x811636ae, 0xbf7df441, 0xc66628a3, 0xf80dea4c, 0xfbb6dda7, + 0xc5dd1f48, 0x7004e7d1, 0x4e6f253e, 0x4dd412d5, 0x73bfd03a, + 0x0aa40cd8, 0x34cfce37, 0x3774f9dc, 0x091f3b33, 0x844430c2, + 0xba2ff22d, 0xb994c5c6, 0x87ff0729, 0xfee4dbcb, 0xc08f1924, + 0xc3342ecf, 0xfd5fec20, 0x988549f6, 0xa6ee8b19, 0xa555bcf2, + 0x9b3e7e1d, 0xe225a2ff, 0xdc4e6010, 0xdff557fb, 0xe19e9514, + 0x6cc59ee5, 0x52ae5c0a, 0x51156be1, 0x6f7ea90e, 0x166575ec, + 0x280eb703, 0x2bb580e8, 0x15de4207, 0x010905e6, 0x3f62c709, + 0x3cd9f0e2, 0x02b2320d, 0x7ba9eeef, 0x45c22c00, 0x46791beb, + 0x7812d904, 0xf549d2f5, 0xcb22101a, 0xc89927f1, 0xf6f2e51e, + 0x8fe939fc, 0xb182fb13, 0xb239ccf8, 0x8c520e17, 0xe988abc1, + 0xd7e3692e, 0xd4585ec5, 0xea339c2a, 0x932840c8, 0xad438227, + 0xaef8b5cc, 0x90937723, 0x1dc87cd2, 0x23a3be3d, 0x201889d6, + 0x1e734b39, 0x676897db, 0x59035534, 0x5ab862df, 0x64d3a030, + 0xd10a58a9, 0xef619a46, 0xecdaadad, 0xd2b16f42, 0xabaab3a0, + 0x95c1714f, 0x967a46a4, 0xa811844b, 0x254a8fba, 0x1b214d55, + 0x189a7abe, 0x26f1b851, 0x5fea64b3, 0x6181a65c, 0x623a91b7, + 0x5c515358, 0x398bf68e, 0x07e03461, 0x045b038a, 0x3a30c165, + 0x432b1d87, 0x7d40df68, 0x7efbe883, 0x40902a6c, 0xcdcb219d, + 0xf3a0e372, 0xf01bd499, 0xce701676, 0xb76bca94, 0x8900087b, + 0x8abb3f90, 0xb4d0fd7f, 0xa10ebf78, 0x9f657d97, 0x9cde4a7c, + 0xa2b58893, 0xdbae5471, 0xe5c5969e, 0xe67ea175, 0xd815639a, + 0x554e686b, 0x6b25aa84, 0x689e9d6f, 0x56f55f80, 0x2fee8362, + 0x1185418d, 0x123e7666, 0x2c55b489, 0x498f115f, 0x77e4d3b0, + 0x745fe45b, 0x4a3426b4, 0x332ffa56, 0x0d4438b9, 0x0eff0f52, + 0x3094cdbd, 0xbdcfc64c, 0x83a404a3, 0x801f3348, 0xbe74f1a7, + 0xc76f2d45, 0xf904efaa, 0xfabfd841, 0xc4d41aae, 0x710de237, + 0x4f6620d8, 0x4cdd1733, 0x72b6d5dc, 0x0bad093e, 0x35c6cbd1, + 0x367dfc3a, 0x08163ed5, 0x854d3524, 0xbb26f7cb, 0xb89dc020, + 0x86f602cf, 0xffedde2d, 0xc1861cc2, 0xc23d2b29, 0xfc56e9c6, + 0x998c4c10, 0xa7e78eff, 0xa45cb914, 0x9a377bfb, 0xe32ca719, + 0xdd4765f6, 0xdefc521d, 0xe09790f2, 0x6dcc9b03, 0x53a759ec, + 0x501c6e07, 0x6e77ace8, 0x176c700a, 0x2907b2e5, 0x2abc850e, + 0x14d747e1}, + {0x00000000, 0xc0df8ec1, 0xc1b96c58, 0x0166e299, 0x8273d9b0, + 0x42ac5771, 0x43cab5e8, 0x83153b29, 0x45e1c3ba, 0x853e4d7b, + 0x8458afe2, 0x44872123, 0xc7921a0a, 0x074d94cb, 0x062b7652, + 0xc6f4f893, 0xcbc4f6ae, 0x0b1b786f, 0x0a7d9af6, 0xcaa21437, + 0x49b72f1e, 0x8968a1df, 0x880e4346, 0x48d1cd87, 0x8e253514, + 0x4efabbd5, 0x4f9c594c, 0x8f43d78d, 0x0c56eca4, 0xcc896265, + 0xcdef80fc, 0x0d300e3d, 0xd78f9c86, 0x17501247, 0x1636f0de, + 0xd6e97e1f, 0x55fc4536, 0x9523cbf7, 0x9445296e, 0x549aa7af, + 0x926e5f3c, 0x52b1d1fd, 0x53d73364, 0x9308bda5, 0x101d868c, + 0xd0c2084d, 0xd1a4ead4, 0x117b6415, 0x1c4b6a28, 0xdc94e4e9, + 0xddf20670, 0x1d2d88b1, 0x9e38b398, 0x5ee73d59, 0x5f81dfc0, + 0x9f5e5101, 0x59aaa992, 0x99752753, 0x9813c5ca, 0x58cc4b0b, + 0xdbd97022, 0x1b06fee3, 0x1a601c7a, 0xdabf92bb, 0xef1948d6, + 0x2fc6c617, 0x2ea0248e, 0xee7faa4f, 0x6d6a9166, 0xadb51fa7, + 0xacd3fd3e, 0x6c0c73ff, 0xaaf88b6c, 0x6a2705ad, 0x6b41e734, + 0xab9e69f5, 0x288b52dc, 0xe854dc1d, 0xe9323e84, 0x29edb045, + 0x24ddbe78, 0xe40230b9, 0xe564d220, 0x25bb5ce1, 0xa6ae67c8, + 0x6671e909, 0x67170b90, 0xa7c88551, 0x613c7dc2, 0xa1e3f303, + 0xa085119a, 0x605a9f5b, 0xe34fa472, 0x23902ab3, 0x22f6c82a, + 0xe22946eb, 0x3896d450, 0xf8495a91, 0xf92fb808, 0x39f036c9, + 0xbae50de0, 0x7a3a8321, 0x7b5c61b8, 0xbb83ef79, 0x7d7717ea, + 0xbda8992b, 0xbcce7bb2, 0x7c11f573, 0xff04ce5a, 0x3fdb409b, + 0x3ebda202, 0xfe622cc3, 0xf35222fe, 0x338dac3f, 0x32eb4ea6, + 0xf234c067, 0x7121fb4e, 0xb1fe758f, 0xb0989716, 0x704719d7, + 0xb6b3e144, 0x766c6f85, 0x770a8d1c, 0xb7d503dd, 0x34c038f4, + 0xf41fb635, 0xf57954ac, 0x35a6da6d, 0x9f35e177, 0x5fea6fb6, + 0x5e8c8d2f, 0x9e5303ee, 0x1d4638c7, 0xdd99b606, 0xdcff549f, + 0x1c20da5e, 0xdad422cd, 0x1a0bac0c, 0x1b6d4e95, 0xdbb2c054, + 0x58a7fb7d, 0x987875bc, 0x991e9725, 0x59c119e4, 0x54f117d9, + 0x942e9918, 0x95487b81, 0x5597f540, 0xd682ce69, 0x165d40a8, + 0x173ba231, 0xd7e42cf0, 0x1110d463, 0xd1cf5aa2, 0xd0a9b83b, + 0x107636fa, 0x93630dd3, 0x53bc8312, 0x52da618b, 0x9205ef4a, + 0x48ba7df1, 0x8865f330, 0x890311a9, 0x49dc9f68, 0xcac9a441, + 0x0a162a80, 0x0b70c819, 0xcbaf46d8, 0x0d5bbe4b, 0xcd84308a, + 0xcce2d213, 0x0c3d5cd2, 0x8f2867fb, 0x4ff7e93a, 0x4e910ba3, + 0x8e4e8562, 0x837e8b5f, 0x43a1059e, 0x42c7e707, 0x821869c6, + 0x010d52ef, 0xc1d2dc2e, 0xc0b43eb7, 0x006bb076, 0xc69f48e5, + 0x0640c624, 0x072624bd, 0xc7f9aa7c, 0x44ec9155, 0x84331f94, + 0x8555fd0d, 0x458a73cc, 0x702ca9a1, 0xb0f32760, 0xb195c5f9, + 0x714a4b38, 0xf25f7011, 0x3280fed0, 0x33e61c49, 0xf3399288, + 0x35cd6a1b, 0xf512e4da, 0xf4740643, 0x34ab8882, 0xb7beb3ab, + 0x77613d6a, 0x7607dff3, 0xb6d85132, 0xbbe85f0f, 0x7b37d1ce, + 0x7a513357, 0xba8ebd96, 0x399b86bf, 0xf944087e, 0xf822eae7, + 0x38fd6426, 0xfe099cb5, 0x3ed61274, 0x3fb0f0ed, 0xff6f7e2c, + 0x7c7a4505, 0xbca5cbc4, 0xbdc3295d, 0x7d1ca79c, 0xa7a33527, + 0x677cbbe6, 0x661a597f, 0xa6c5d7be, 0x25d0ec97, 0xe50f6256, + 0xe46980cf, 0x24b60e0e, 0xe242f69d, 0x229d785c, 0x23fb9ac5, + 0xe3241404, 0x60312f2d, 0xa0eea1ec, 0xa1884375, 0x6157cdb4, + 0x6c67c389, 0xacb84d48, 0xaddeafd1, 0x6d012110, 0xee141a39, + 0x2ecb94f8, 0x2fad7661, 0xef72f8a0, 0x29860033, 0xe9598ef2, + 0xe83f6c6b, 0x28e0e2aa, 0xabf5d983, 0x6b2a5742, 0x6a4cb5db, + 0xaa933b1a}, + {0x00000000, 0x6f4ca59b, 0x9f9e3bec, 0xf0d29e77, 0x7f3b0603, + 0x1077a398, 0xe0a53def, 0x8fe99874, 0xfe760c06, 0x913aa99d, + 0x61e837ea, 0x0ea49271, 0x814d0a05, 0xee01af9e, 0x1ed331e9, + 0x719f9472, 0xfced180c, 0x93a1bd97, 0x637323e0, 0x0c3f867b, + 0x83d61e0f, 0xec9abb94, 0x1c4825e3, 0x73048078, 0x029b140a, + 0x6dd7b191, 0x9d052fe6, 0xf2498a7d, 0x7da01209, 0x12ecb792, + 0xe23e29e5, 0x8d728c7e, 0xf8db3118, 0x97979483, 0x67450af4, + 0x0809af6f, 0x87e0371b, 0xe8ac9280, 0x187e0cf7, 0x7732a96c, + 0x06ad3d1e, 0x69e19885, 0x993306f2, 0xf67fa369, 0x79963b1d, + 0x16da9e86, 0xe60800f1, 0x8944a56a, 0x04362914, 0x6b7a8c8f, + 0x9ba812f8, 0xf4e4b763, 0x7b0d2f17, 0x14418a8c, 0xe49314fb, + 0x8bdfb160, 0xfa402512, 0x950c8089, 0x65de1efe, 0x0a92bb65, + 0x857b2311, 0xea37868a, 0x1ae518fd, 0x75a9bd66, 0xf0b76330, + 0x9ffbc6ab, 0x6f2958dc, 0x0065fd47, 0x8f8c6533, 0xe0c0c0a8, + 0x10125edf, 0x7f5efb44, 0x0ec16f36, 0x618dcaad, 0x915f54da, + 0xfe13f141, 0x71fa6935, 0x1eb6ccae, 0xee6452d9, 0x8128f742, + 0x0c5a7b3c, 0x6316dea7, 0x93c440d0, 0xfc88e54b, 0x73617d3f, + 0x1c2dd8a4, 0xecff46d3, 0x83b3e348, 0xf22c773a, 0x9d60d2a1, + 0x6db24cd6, 0x02fee94d, 0x8d177139, 0xe25bd4a2, 0x12894ad5, + 0x7dc5ef4e, 0x086c5228, 0x6720f7b3, 0x97f269c4, 0xf8becc5f, + 0x7757542b, 0x181bf1b0, 0xe8c96fc7, 0x8785ca5c, 0xf61a5e2e, + 0x9956fbb5, 0x698465c2, 0x06c8c059, 0x8921582d, 0xe66dfdb6, + 0x16bf63c1, 0x79f3c65a, 0xf4814a24, 0x9bcdefbf, 0x6b1f71c8, + 0x0453d453, 0x8bba4c27, 0xe4f6e9bc, 0x142477cb, 0x7b68d250, + 0x0af74622, 0x65bbe3b9, 0x95697dce, 0xfa25d855, 0x75cc4021, + 0x1a80e5ba, 0xea527bcd, 0x851ede56, 0xe06fc760, 0x8f2362fb, + 0x7ff1fc8c, 0x10bd5917, 0x9f54c163, 0xf01864f8, 0x00cafa8f, + 0x6f865f14, 0x1e19cb66, 0x71556efd, 0x8187f08a, 0xeecb5511, + 0x6122cd65, 0x0e6e68fe, 0xfebcf689, 0x91f05312, 0x1c82df6c, + 0x73ce7af7, 0x831ce480, 0xec50411b, 0x63b9d96f, 0x0cf57cf4, + 0xfc27e283, 0x936b4718, 0xe2f4d36a, 0x8db876f1, 0x7d6ae886, + 0x12264d1d, 0x9dcfd569, 0xf28370f2, 0x0251ee85, 0x6d1d4b1e, + 0x18b4f678, 0x77f853e3, 0x872acd94, 0xe866680f, 0x678ff07b, + 0x08c355e0, 0xf811cb97, 0x975d6e0c, 0xe6c2fa7e, 0x898e5fe5, + 0x795cc192, 0x16106409, 0x99f9fc7d, 0xf6b559e6, 0x0667c791, + 0x692b620a, 0xe459ee74, 0x8b154bef, 0x7bc7d598, 0x148b7003, + 0x9b62e877, 0xf42e4dec, 0x04fcd39b, 0x6bb07600, 0x1a2fe272, + 0x756347e9, 0x85b1d99e, 0xeafd7c05, 0x6514e471, 0x0a5841ea, + 0xfa8adf9d, 0x95c67a06, 0x10d8a450, 0x7f9401cb, 0x8f469fbc, + 0xe00a3a27, 0x6fe3a253, 0x00af07c8, 0xf07d99bf, 0x9f313c24, + 0xeeaea856, 0x81e20dcd, 0x713093ba, 0x1e7c3621, 0x9195ae55, + 0xfed90bce, 0x0e0b95b9, 0x61473022, 0xec35bc5c, 0x837919c7, + 0x73ab87b0, 0x1ce7222b, 0x930eba5f, 0xfc421fc4, 0x0c9081b3, + 0x63dc2428, 0x1243b05a, 0x7d0f15c1, 0x8ddd8bb6, 0xe2912e2d, + 0x6d78b659, 0x023413c2, 0xf2e68db5, 0x9daa282e, 0xe8039548, + 0x874f30d3, 0x779daea4, 0x18d10b3f, 0x9738934b, 0xf87436d0, + 0x08a6a8a7, 0x67ea0d3c, 0x1675994e, 0x79393cd5, 0x89eba2a2, + 0xe6a70739, 0x694e9f4d, 0x06023ad6, 0xf6d0a4a1, 0x999c013a, + 0x14ee8d44, 0x7ba228df, 0x8b70b6a8, 0xe43c1333, 0x6bd58b47, + 0x04992edc, 0xf44bb0ab, 0x9b071530, 0xea988142, 0x85d424d9, + 0x7506baae, 0x1a4a1f35, 0x95a38741, 0xfaef22da, 0x0a3dbcad, + 0x65711936}}; + +#endif + +#endif + +#if N == 4 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xf1da05aa, 0x38c50d15, 0xc91f08bf, 0x718a1a2a, + 0x80501f80, 0x494f173f, 0xb8951295, 0xe3143454, 0x12ce31fe, + 0xdbd13941, 0x2a0b3ceb, 0x929e2e7e, 0x63442bd4, 0xaa5b236b, + 0x5b8126c1, 0x1d596ee9, 0xec836b43, 0x259c63fc, 0xd4466656, + 0x6cd374c3, 0x9d097169, 0x541679d6, 0xa5cc7c7c, 0xfe4d5abd, + 0x0f975f17, 0xc68857a8, 0x37525202, 0x8fc74097, 0x7e1d453d, + 0xb7024d82, 0x46d84828, 0x3ab2ddd2, 0xcb68d878, 0x0277d0c7, + 0xf3add56d, 0x4b38c7f8, 0xbae2c252, 0x73fdcaed, 0x8227cf47, + 0xd9a6e986, 0x287cec2c, 0xe163e493, 0x10b9e139, 0xa82cf3ac, + 0x59f6f606, 0x90e9feb9, 0x6133fb13, 0x27ebb33b, 0xd631b691, + 0x1f2ebe2e, 0xeef4bb84, 0x5661a911, 0xa7bbacbb, 0x6ea4a404, + 0x9f7ea1ae, 0xc4ff876f, 0x352582c5, 0xfc3a8a7a, 0x0de08fd0, + 0xb5759d45, 0x44af98ef, 0x8db09050, 0x7c6a95fa, 0x7565bba4, + 0x84bfbe0e, 0x4da0b6b1, 0xbc7ab31b, 0x04efa18e, 0xf535a424, + 0x3c2aac9b, 0xcdf0a931, 0x96718ff0, 0x67ab8a5a, 0xaeb482e5, + 0x5f6e874f, 0xe7fb95da, 0x16219070, 0xdf3e98cf, 0x2ee49d65, + 0x683cd54d, 0x99e6d0e7, 0x50f9d858, 0xa123ddf2, 0x19b6cf67, + 0xe86ccacd, 0x2173c272, 0xd0a9c7d8, 0x8b28e119, 0x7af2e4b3, + 0xb3edec0c, 0x4237e9a6, 0xfaa2fb33, 0x0b78fe99, 0xc267f626, + 0x33bdf38c, 0x4fd76676, 0xbe0d63dc, 0x77126b63, 0x86c86ec9, + 0x3e5d7c5c, 0xcf8779f6, 0x06987149, 0xf74274e3, 0xacc35222, + 0x5d195788, 0x94065f37, 0x65dc5a9d, 0xdd494808, 0x2c934da2, + 0xe58c451d, 0x145640b7, 0x528e089f, 0xa3540d35, 0x6a4b058a, + 0x9b910020, 0x230412b5, 0xd2de171f, 0x1bc11fa0, 0xea1b1a0a, + 0xb19a3ccb, 0x40403961, 0x895f31de, 0x78853474, 0xc01026e1, + 0x31ca234b, 0xf8d52bf4, 0x090f2e5e, 0xeacb7748, 0x1b1172e2, + 0xd20e7a5d, 0x23d47ff7, 0x9b416d62, 0x6a9b68c8, 0xa3846077, + 0x525e65dd, 0x09df431c, 0xf80546b6, 0x311a4e09, 0xc0c04ba3, + 0x78555936, 0x898f5c9c, 0x40905423, 0xb14a5189, 0xf79219a1, + 0x06481c0b, 0xcf5714b4, 0x3e8d111e, 0x8618038b, 0x77c20621, + 0xbedd0e9e, 0x4f070b34, 0x14862df5, 0xe55c285f, 0x2c4320e0, + 0xdd99254a, 0x650c37df, 0x94d63275, 0x5dc93aca, 0xac133f60, + 0xd079aa9a, 0x21a3af30, 0xe8bca78f, 0x1966a225, 0xa1f3b0b0, + 0x5029b51a, 0x9936bda5, 0x68ecb80f, 0x336d9ece, 0xc2b79b64, + 0x0ba893db, 0xfa729671, 0x42e784e4, 0xb33d814e, 0x7a2289f1, + 0x8bf88c5b, 0xcd20c473, 0x3cfac1d9, 0xf5e5c966, 0x043fcccc, + 0xbcaade59, 0x4d70dbf3, 0x846fd34c, 0x75b5d6e6, 0x2e34f027, + 0xdfeef58d, 0x16f1fd32, 0xe72bf898, 0x5fbeea0d, 0xae64efa7, + 0x677be718, 0x96a1e2b2, 0x9faeccec, 0x6e74c946, 0xa76bc1f9, + 0x56b1c453, 0xee24d6c6, 0x1ffed36c, 0xd6e1dbd3, 0x273bde79, + 0x7cbaf8b8, 0x8d60fd12, 0x447ff5ad, 0xb5a5f007, 0x0d30e292, + 0xfceae738, 0x35f5ef87, 0xc42fea2d, 0x82f7a205, 0x732da7af, + 0xba32af10, 0x4be8aaba, 0xf37db82f, 0x02a7bd85, 0xcbb8b53a, + 0x3a62b090, 0x61e39651, 0x903993fb, 0x59269b44, 0xa8fc9eee, + 0x10698c7b, 0xe1b389d1, 0x28ac816e, 0xd97684c4, 0xa51c113e, + 0x54c61494, 0x9dd91c2b, 0x6c031981, 0xd4960b14, 0x254c0ebe, + 0xec530601, 0x1d8903ab, 0x4608256a, 0xb7d220c0, 0x7ecd287f, + 0x8f172dd5, 0x37823f40, 0xc6583aea, 0x0f473255, 0xfe9d37ff, + 0xb8457fd7, 0x499f7a7d, 0x808072c2, 0x715a7768, 0xc9cf65fd, + 0x38156057, 0xf10a68e8, 0x00d06d42, 0x5b514b83, 0xaa8b4e29, + 0x63944696, 0x924e433c, 0x2adb51a9, 0xdb015403, 0x121e5cbc, + 0xe3c45916}, + {0x00000000, 0x0ee7e8d1, 0x1dcfd1a2, 0x13283973, 0x3b9fa344, + 0x35784b95, 0x265072e6, 0x28b79a37, 0x773f4688, 0x79d8ae59, + 0x6af0972a, 0x64177ffb, 0x4ca0e5cc, 0x42470d1d, 0x516f346e, + 0x5f88dcbf, 0xee7e8d10, 0xe09965c1, 0xf3b15cb2, 0xfd56b463, + 0xd5e12e54, 0xdb06c685, 0xc82efff6, 0xc6c91727, 0x9941cb98, + 0x97a62349, 0x848e1a3a, 0x8a69f2eb, 0xa2de68dc, 0xac39800d, + 0xbf11b97e, 0xb1f651af, 0x078c1c61, 0x096bf4b0, 0x1a43cdc3, + 0x14a42512, 0x3c13bf25, 0x32f457f4, 0x21dc6e87, 0x2f3b8656, + 0x70b35ae9, 0x7e54b238, 0x6d7c8b4b, 0x639b639a, 0x4b2cf9ad, + 0x45cb117c, 0x56e3280f, 0x5804c0de, 0xe9f29171, 0xe71579a0, + 0xf43d40d3, 0xfadaa802, 0xd26d3235, 0xdc8adae4, 0xcfa2e397, + 0xc1450b46, 0x9ecdd7f9, 0x902a3f28, 0x8302065b, 0x8de5ee8a, + 0xa55274bd, 0xabb59c6c, 0xb89da51f, 0xb67a4dce, 0x0f1838c2, + 0x01ffd013, 0x12d7e960, 0x1c3001b1, 0x34879b86, 0x3a607357, + 0x29484a24, 0x27afa2f5, 0x78277e4a, 0x76c0969b, 0x65e8afe8, + 0x6b0f4739, 0x43b8dd0e, 0x4d5f35df, 0x5e770cac, 0x5090e47d, + 0xe166b5d2, 0xef815d03, 0xfca96470, 0xf24e8ca1, 0xdaf91696, + 0xd41efe47, 0xc736c734, 0xc9d12fe5, 0x9659f35a, 0x98be1b8b, + 0x8b9622f8, 0x8571ca29, 0xadc6501e, 0xa321b8cf, 0xb00981bc, + 0xbeee696d, 0x089424a3, 0x0673cc72, 0x155bf501, 0x1bbc1dd0, + 0x330b87e7, 0x3dec6f36, 0x2ec45645, 0x2023be94, 0x7fab622b, + 0x714c8afa, 0x6264b389, 0x6c835b58, 0x4434c16f, 0x4ad329be, + 0x59fb10cd, 0x571cf81c, 0xe6eaa9b3, 0xe80d4162, 0xfb257811, + 0xf5c290c0, 0xdd750af7, 0xd392e226, 0xc0badb55, 0xce5d3384, + 0x91d5ef3b, 0x9f3207ea, 0x8c1a3e99, 0x82fdd648, 0xaa4a4c7f, + 0xa4ada4ae, 0xb7859ddd, 0xb962750c, 0x1e307184, 0x10d79955, + 0x03ffa026, 0x0d1848f7, 0x25afd2c0, 0x2b483a11, 0x38600362, + 0x3687ebb3, 0x690f370c, 0x67e8dfdd, 0x74c0e6ae, 0x7a270e7f, + 0x52909448, 0x5c777c99, 0x4f5f45ea, 0x41b8ad3b, 0xf04efc94, + 0xfea91445, 0xed812d36, 0xe366c5e7, 0xcbd15fd0, 0xc536b701, + 0xd61e8e72, 0xd8f966a3, 0x8771ba1c, 0x899652cd, 0x9abe6bbe, + 0x9459836f, 0xbcee1958, 0xb209f189, 0xa121c8fa, 0xafc6202b, + 0x19bc6de5, 0x175b8534, 0x0473bc47, 0x0a945496, 0x2223cea1, + 0x2cc42670, 0x3fec1f03, 0x310bf7d2, 0x6e832b6d, 0x6064c3bc, + 0x734cfacf, 0x7dab121e, 0x551c8829, 0x5bfb60f8, 0x48d3598b, + 0x4634b15a, 0xf7c2e0f5, 0xf9250824, 0xea0d3157, 0xe4ead986, + 0xcc5d43b1, 0xc2baab60, 0xd1929213, 0xdf757ac2, 0x80fda67d, + 0x8e1a4eac, 0x9d3277df, 0x93d59f0e, 0xbb620539, 0xb585ede8, + 0xa6add49b, 0xa84a3c4a, 0x11284946, 0x1fcfa197, 0x0ce798e4, + 0x02007035, 0x2ab7ea02, 0x245002d3, 0x37783ba0, 0x399fd371, + 0x66170fce, 0x68f0e71f, 0x7bd8de6c, 0x753f36bd, 0x5d88ac8a, + 0x536f445b, 0x40477d28, 0x4ea095f9, 0xff56c456, 0xf1b12c87, + 0xe29915f4, 0xec7efd25, 0xc4c96712, 0xca2e8fc3, 0xd906b6b0, + 0xd7e15e61, 0x886982de, 0x868e6a0f, 0x95a6537c, 0x9b41bbad, + 0xb3f6219a, 0xbd11c94b, 0xae39f038, 0xa0de18e9, 0x16a45527, + 0x1843bdf6, 0x0b6b8485, 0x058c6c54, 0x2d3bf663, 0x23dc1eb2, + 0x30f427c1, 0x3e13cf10, 0x619b13af, 0x6f7cfb7e, 0x7c54c20d, + 0x72b32adc, 0x5a04b0eb, 0x54e3583a, 0x47cb6149, 0x492c8998, + 0xf8dad837, 0xf63d30e6, 0xe5150995, 0xebf2e144, 0xc3457b73, + 0xcda293a2, 0xde8aaad1, 0xd06d4200, 0x8fe59ebf, 0x8102766e, + 0x922a4f1d, 0x9ccda7cc, 0xb47a3dfb, 0xba9dd52a, 0xa9b5ec59, + 0xa7520488}, + {0x00000000, 0x3c60e308, 0x78c1c610, 0x44a12518, 0xf1838c20, + 0xcde36f28, 0x89424a30, 0xb522a938, 0x38761e01, 0x0416fd09, + 0x40b7d811, 0x7cd73b19, 0xc9f59221, 0xf5957129, 0xb1345431, + 0x8d54b739, 0x70ec3c02, 0x4c8cdf0a, 0x082dfa12, 0x344d191a, + 0x816fb022, 0xbd0f532a, 0xf9ae7632, 0xc5ce953a, 0x489a2203, + 0x74fac10b, 0x305be413, 0x0c3b071b, 0xb919ae23, 0x85794d2b, + 0xc1d86833, 0xfdb88b3b, 0xe1d87804, 0xddb89b0c, 0x9919be14, + 0xa5795d1c, 0x105bf424, 0x2c3b172c, 0x689a3234, 0x54fad13c, + 0xd9ae6605, 0xe5ce850d, 0xa16fa015, 0x9d0f431d, 0x282dea25, + 0x144d092d, 0x50ec2c35, 0x6c8ccf3d, 0x91344406, 0xad54a70e, + 0xe9f58216, 0xd595611e, 0x60b7c826, 0x5cd72b2e, 0x18760e36, + 0x2416ed3e, 0xa9425a07, 0x9522b90f, 0xd1839c17, 0xede37f1f, + 0x58c1d627, 0x64a1352f, 0x20001037, 0x1c60f33f, 0x18c1f649, + 0x24a11541, 0x60003059, 0x5c60d351, 0xe9427a69, 0xd5229961, + 0x9183bc79, 0xade35f71, 0x20b7e848, 0x1cd70b40, 0x58762e58, + 0x6416cd50, 0xd1346468, 0xed548760, 0xa9f5a278, 0x95954170, + 0x682dca4b, 0x544d2943, 0x10ec0c5b, 0x2c8cef53, 0x99ae466b, + 0xa5cea563, 0xe16f807b, 0xdd0f6373, 0x505bd44a, 0x6c3b3742, + 0x289a125a, 0x14faf152, 0xa1d8586a, 0x9db8bb62, 0xd9199e7a, + 0xe5797d72, 0xf9198e4d, 0xc5796d45, 0x81d8485d, 0xbdb8ab55, + 0x089a026d, 0x34fae165, 0x705bc47d, 0x4c3b2775, 0xc16f904c, + 0xfd0f7344, 0xb9ae565c, 0x85ceb554, 0x30ec1c6c, 0x0c8cff64, + 0x482dda7c, 0x744d3974, 0x89f5b24f, 0xb5955147, 0xf134745f, + 0xcd549757, 0x78763e6f, 0x4416dd67, 0x00b7f87f, 0x3cd71b77, + 0xb183ac4e, 0x8de34f46, 0xc9426a5e, 0xf5228956, 0x4000206e, + 0x7c60c366, 0x38c1e67e, 0x04a10576, 0x3183ec92, 0x0de30f9a, + 0x49422a82, 0x7522c98a, 0xc00060b2, 0xfc6083ba, 0xb8c1a6a2, + 0x84a145aa, 0x09f5f293, 0x3595119b, 0x71343483, 0x4d54d78b, + 0xf8767eb3, 0xc4169dbb, 0x80b7b8a3, 0xbcd75bab, 0x416fd090, + 0x7d0f3398, 0x39ae1680, 0x05cef588, 0xb0ec5cb0, 0x8c8cbfb8, + 0xc82d9aa0, 0xf44d79a8, 0x7919ce91, 0x45792d99, 0x01d80881, + 0x3db8eb89, 0x889a42b1, 0xb4faa1b9, 0xf05b84a1, 0xcc3b67a9, + 0xd05b9496, 0xec3b779e, 0xa89a5286, 0x94fab18e, 0x21d818b6, + 0x1db8fbbe, 0x5919dea6, 0x65793dae, 0xe82d8a97, 0xd44d699f, + 0x90ec4c87, 0xac8caf8f, 0x19ae06b7, 0x25cee5bf, 0x616fc0a7, + 0x5d0f23af, 0xa0b7a894, 0x9cd74b9c, 0xd8766e84, 0xe4168d8c, + 0x513424b4, 0x6d54c7bc, 0x29f5e2a4, 0x159501ac, 0x98c1b695, + 0xa4a1559d, 0xe0007085, 0xdc60938d, 0x69423ab5, 0x5522d9bd, + 0x1183fca5, 0x2de31fad, 0x29421adb, 0x1522f9d3, 0x5183dccb, + 0x6de33fc3, 0xd8c196fb, 0xe4a175f3, 0xa00050eb, 0x9c60b3e3, + 0x113404da, 0x2d54e7d2, 0x69f5c2ca, 0x559521c2, 0xe0b788fa, + 0xdcd76bf2, 0x98764eea, 0xa416ade2, 0x59ae26d9, 0x65cec5d1, + 0x216fe0c9, 0x1d0f03c1, 0xa82daaf9, 0x944d49f1, 0xd0ec6ce9, + 0xec8c8fe1, 0x61d838d8, 0x5db8dbd0, 0x1919fec8, 0x25791dc0, + 0x905bb4f8, 0xac3b57f0, 0xe89a72e8, 0xd4fa91e0, 0xc89a62df, + 0xf4fa81d7, 0xb05ba4cf, 0x8c3b47c7, 0x3919eeff, 0x05790df7, + 0x41d828ef, 0x7db8cbe7, 0xf0ec7cde, 0xcc8c9fd6, 0x882dbace, + 0xb44d59c6, 0x016ff0fe, 0x3d0f13f6, 0x79ae36ee, 0x45ced5e6, + 0xb8765edd, 0x8416bdd5, 0xc0b798cd, 0xfcd77bc5, 0x49f5d2fd, + 0x759531f5, 0x313414ed, 0x0d54f7e5, 0x800040dc, 0xbc60a3d4, + 0xf8c186cc, 0xc4a165c4, 0x7183ccfc, 0x4de32ff4, 0x09420aec, + 0x3522e9e4}, + {0x00000000, 0x6307d924, 0xc60fb248, 0xa5086b6c, 0x576e62d1, + 0x3469bbf5, 0x9161d099, 0xf26609bd, 0xaedcc5a2, 0xcddb1c86, + 0x68d377ea, 0x0bd4aece, 0xf9b2a773, 0x9ab57e57, 0x3fbd153b, + 0x5cbacc1f, 0x86c88d05, 0xe5cf5421, 0x40c73f4d, 0x23c0e669, + 0xd1a6efd4, 0xb2a136f0, 0x17a95d9c, 0x74ae84b8, 0x281448a7, + 0x4b139183, 0xee1bfaef, 0x8d1c23cb, 0x7f7a2a76, 0x1c7df352, + 0xb975983e, 0xda72411a, 0xd6e01c4b, 0xb5e7c56f, 0x10efae03, + 0x73e87727, 0x818e7e9a, 0xe289a7be, 0x4781ccd2, 0x248615f6, + 0x783cd9e9, 0x1b3b00cd, 0xbe336ba1, 0xdd34b285, 0x2f52bb38, + 0x4c55621c, 0xe95d0970, 0x8a5ad054, 0x5028914e, 0x332f486a, + 0x96272306, 0xf520fa22, 0x0746f39f, 0x64412abb, 0xc14941d7, + 0xa24e98f3, 0xfef454ec, 0x9df38dc8, 0x38fbe6a4, 0x5bfc3f80, + 0xa99a363d, 0xca9def19, 0x6f958475, 0x0c925d51, 0x76b13ed7, + 0x15b6e7f3, 0xb0be8c9f, 0xd3b955bb, 0x21df5c06, 0x42d88522, + 0xe7d0ee4e, 0x84d7376a, 0xd86dfb75, 0xbb6a2251, 0x1e62493d, + 0x7d659019, 0x8f0399a4, 0xec044080, 0x490c2bec, 0x2a0bf2c8, + 0xf079b3d2, 0x937e6af6, 0x3676019a, 0x5571d8be, 0xa717d103, + 0xc4100827, 0x6118634b, 0x021fba6f, 0x5ea57670, 0x3da2af54, + 0x98aac438, 0xfbad1d1c, 0x09cb14a1, 0x6acccd85, 0xcfc4a6e9, + 0xacc37fcd, 0xa051229c, 0xc356fbb8, 0x665e90d4, 0x055949f0, + 0xf73f404d, 0x94389969, 0x3130f205, 0x52372b21, 0x0e8de73e, + 0x6d8a3e1a, 0xc8825576, 0xab858c52, 0x59e385ef, 0x3ae45ccb, + 0x9fec37a7, 0xfcebee83, 0x2699af99, 0x459e76bd, 0xe0961dd1, + 0x8391c4f5, 0x71f7cd48, 0x12f0146c, 0xb7f87f00, 0xd4ffa624, + 0x88456a3b, 0xeb42b31f, 0x4e4ad873, 0x2d4d0157, 0xdf2b08ea, + 0xbc2cd1ce, 0x1924baa2, 0x7a236386, 0xed627dae, 0x8e65a48a, + 0x2b6dcfe6, 0x486a16c2, 0xba0c1f7f, 0xd90bc65b, 0x7c03ad37, + 0x1f047413, 0x43beb80c, 0x20b96128, 0x85b10a44, 0xe6b6d360, + 0x14d0dadd, 0x77d703f9, 0xd2df6895, 0xb1d8b1b1, 0x6baaf0ab, + 0x08ad298f, 0xada542e3, 0xcea29bc7, 0x3cc4927a, 0x5fc34b5e, + 0xfacb2032, 0x99ccf916, 0xc5763509, 0xa671ec2d, 0x03798741, + 0x607e5e65, 0x921857d8, 0xf11f8efc, 0x5417e590, 0x37103cb4, + 0x3b8261e5, 0x5885b8c1, 0xfd8dd3ad, 0x9e8a0a89, 0x6cec0334, + 0x0febda10, 0xaae3b17c, 0xc9e46858, 0x955ea447, 0xf6597d63, + 0x5351160f, 0x3056cf2b, 0xc230c696, 0xa1371fb2, 0x043f74de, + 0x6738adfa, 0xbd4aece0, 0xde4d35c4, 0x7b455ea8, 0x1842878c, + 0xea248e31, 0x89235715, 0x2c2b3c79, 0x4f2ce55d, 0x13962942, + 0x7091f066, 0xd5999b0a, 0xb69e422e, 0x44f84b93, 0x27ff92b7, + 0x82f7f9db, 0xe1f020ff, 0x9bd34379, 0xf8d49a5d, 0x5ddcf131, + 0x3edb2815, 0xccbd21a8, 0xafbaf88c, 0x0ab293e0, 0x69b54ac4, + 0x350f86db, 0x56085fff, 0xf3003493, 0x9007edb7, 0x6261e40a, + 0x01663d2e, 0xa46e5642, 0xc7698f66, 0x1d1bce7c, 0x7e1c1758, + 0xdb147c34, 0xb813a510, 0x4a75acad, 0x29727589, 0x8c7a1ee5, + 0xef7dc7c1, 0xb3c70bde, 0xd0c0d2fa, 0x75c8b996, 0x16cf60b2, + 0xe4a9690f, 0x87aeb02b, 0x22a6db47, 0x41a10263, 0x4d335f32, + 0x2e348616, 0x8b3ced7a, 0xe83b345e, 0x1a5d3de3, 0x795ae4c7, + 0xdc528fab, 0xbf55568f, 0xe3ef9a90, 0x80e843b4, 0x25e028d8, + 0x46e7f1fc, 0xb481f841, 0xd7862165, 0x728e4a09, 0x1189932d, + 0xcbfbd237, 0xa8fc0b13, 0x0df4607f, 0x6ef3b95b, 0x9c95b0e6, + 0xff9269c2, 0x5a9a02ae, 0x399ddb8a, 0x65271795, 0x0620ceb1, + 0xa328a5dd, 0xc02f7cf9, 0x32497544, 0x514eac60, 0xf446c70c, + 0x97411e28}, + {0x00000000, 0x01b5fd1d, 0x036bfa3a, 0x02de0727, 0x06d7f474, + 0x07620969, 0x05bc0e4e, 0x0409f353, 0x0dafe8e8, 0x0c1a15f5, + 0x0ec412d2, 0x0f71efcf, 0x0b781c9c, 0x0acde181, 0x0813e6a6, + 0x09a61bbb, 0x1b5fd1d0, 0x1aea2ccd, 0x18342bea, 0x1981d6f7, + 0x1d8825a4, 0x1c3dd8b9, 0x1ee3df9e, 0x1f562283, 0x16f03938, + 0x1745c425, 0x159bc302, 0x142e3e1f, 0x1027cd4c, 0x11923051, + 0x134c3776, 0x12f9ca6b, 0x36bfa3a0, 0x370a5ebd, 0x35d4599a, + 0x3461a487, 0x306857d4, 0x31ddaac9, 0x3303adee, 0x32b650f3, + 0x3b104b48, 0x3aa5b655, 0x387bb172, 0x39ce4c6f, 0x3dc7bf3c, + 0x3c724221, 0x3eac4506, 0x3f19b81b, 0x2de07270, 0x2c558f6d, + 0x2e8b884a, 0x2f3e7557, 0x2b378604, 0x2a827b19, 0x285c7c3e, + 0x29e98123, 0x204f9a98, 0x21fa6785, 0x232460a2, 0x22919dbf, + 0x26986eec, 0x272d93f1, 0x25f394d6, 0x244669cb, 0x6d7f4740, + 0x6ccaba5d, 0x6e14bd7a, 0x6fa14067, 0x6ba8b334, 0x6a1d4e29, + 0x68c3490e, 0x6976b413, 0x60d0afa8, 0x616552b5, 0x63bb5592, + 0x620ea88f, 0x66075bdc, 0x67b2a6c1, 0x656ca1e6, 0x64d95cfb, + 0x76209690, 0x77956b8d, 0x754b6caa, 0x74fe91b7, 0x70f762e4, + 0x71429ff9, 0x739c98de, 0x722965c3, 0x7b8f7e78, 0x7a3a8365, + 0x78e48442, 0x7951795f, 0x7d588a0c, 0x7ced7711, 0x7e337036, + 0x7f868d2b, 0x5bc0e4e0, 0x5a7519fd, 0x58ab1eda, 0x591ee3c7, + 0x5d171094, 0x5ca2ed89, 0x5e7ceaae, 0x5fc917b3, 0x566f0c08, + 0x57daf115, 0x5504f632, 0x54b10b2f, 0x50b8f87c, 0x510d0561, + 0x53d30246, 0x5266ff5b, 0x409f3530, 0x412ac82d, 0x43f4cf0a, + 0x42413217, 0x4648c144, 0x47fd3c59, 0x45233b7e, 0x4496c663, + 0x4d30ddd8, 0x4c8520c5, 0x4e5b27e2, 0x4feedaff, 0x4be729ac, + 0x4a52d4b1, 0x488cd396, 0x49392e8b, 0xdafe8e80, 0xdb4b739d, + 0xd99574ba, 0xd82089a7, 0xdc297af4, 0xdd9c87e9, 0xdf4280ce, + 0xdef77dd3, 0xd7516668, 0xd6e49b75, 0xd43a9c52, 0xd58f614f, + 0xd186921c, 0xd0336f01, 0xd2ed6826, 0xd358953b, 0xc1a15f50, + 0xc014a24d, 0xc2caa56a, 0xc37f5877, 0xc776ab24, 0xc6c35639, + 0xc41d511e, 0xc5a8ac03, 0xcc0eb7b8, 0xcdbb4aa5, 0xcf654d82, + 0xced0b09f, 0xcad943cc, 0xcb6cbed1, 0xc9b2b9f6, 0xc80744eb, + 0xec412d20, 0xedf4d03d, 0xef2ad71a, 0xee9f2a07, 0xea96d954, + 0xeb232449, 0xe9fd236e, 0xe848de73, 0xe1eec5c8, 0xe05b38d5, + 0xe2853ff2, 0xe330c2ef, 0xe73931bc, 0xe68ccca1, 0xe452cb86, + 0xe5e7369b, 0xf71efcf0, 0xf6ab01ed, 0xf47506ca, 0xf5c0fbd7, + 0xf1c90884, 0xf07cf599, 0xf2a2f2be, 0xf3170fa3, 0xfab11418, + 0xfb04e905, 0xf9daee22, 0xf86f133f, 0xfc66e06c, 0xfdd31d71, + 0xff0d1a56, 0xfeb8e74b, 0xb781c9c0, 0xb63434dd, 0xb4ea33fa, + 0xb55fcee7, 0xb1563db4, 0xb0e3c0a9, 0xb23dc78e, 0xb3883a93, + 0xba2e2128, 0xbb9bdc35, 0xb945db12, 0xb8f0260f, 0xbcf9d55c, + 0xbd4c2841, 0xbf922f66, 0xbe27d27b, 0xacde1810, 0xad6be50d, + 0xafb5e22a, 0xae001f37, 0xaa09ec64, 0xabbc1179, 0xa962165e, + 0xa8d7eb43, 0xa171f0f8, 0xa0c40de5, 0xa21a0ac2, 0xa3aff7df, + 0xa7a6048c, 0xa613f991, 0xa4cdfeb6, 0xa57803ab, 0x813e6a60, + 0x808b977d, 0x8255905a, 0x83e06d47, 0x87e99e14, 0x865c6309, + 0x8482642e, 0x85379933, 0x8c918288, 0x8d247f95, 0x8ffa78b2, + 0x8e4f85af, 0x8a4676fc, 0x8bf38be1, 0x892d8cc6, 0x889871db, + 0x9a61bbb0, 0x9bd446ad, 0x990a418a, 0x98bfbc97, 0x9cb64fc4, + 0x9d03b2d9, 0x9fddb5fe, 0x9e6848e3, 0x97ce5358, 0x967bae45, + 0x94a5a962, 0x9510547f, 0x9119a72c, 0x90ac5a31, 0x92725d16, + 0x93c7a00b}, + {0x00000000, 0x6e8c1b41, 0xdd183682, 0xb3942dc3, 0x61416b45, + 0x0fcd7004, 0xbc595dc7, 0xd2d54686, 0xc282d68a, 0xac0ecdcb, + 0x1f9ae008, 0x7116fb49, 0xa3c3bdcf, 0xcd4fa68e, 0x7edb8b4d, + 0x1057900c, 0x5e74ab55, 0x30f8b014, 0x836c9dd7, 0xede08696, + 0x3f35c010, 0x51b9db51, 0xe22df692, 0x8ca1edd3, 0x9cf67ddf, + 0xf27a669e, 0x41ee4b5d, 0x2f62501c, 0xfdb7169a, 0x933b0ddb, + 0x20af2018, 0x4e233b59, 0xbce956aa, 0xd2654deb, 0x61f16028, + 0x0f7d7b69, 0xdda83def, 0xb32426ae, 0x00b00b6d, 0x6e3c102c, + 0x7e6b8020, 0x10e79b61, 0xa373b6a2, 0xcdffade3, 0x1f2aeb65, + 0x71a6f024, 0xc232dde7, 0xacbec6a6, 0xe29dfdff, 0x8c11e6be, + 0x3f85cb7d, 0x5109d03c, 0x83dc96ba, 0xed508dfb, 0x5ec4a038, + 0x3048bb79, 0x201f2b75, 0x4e933034, 0xfd071df7, 0x938b06b6, + 0x415e4030, 0x2fd25b71, 0x9c4676b2, 0xf2ca6df3, 0xa2a3ab15, + 0xcc2fb054, 0x7fbb9d97, 0x113786d6, 0xc3e2c050, 0xad6edb11, + 0x1efaf6d2, 0x7076ed93, 0x60217d9f, 0x0ead66de, 0xbd394b1d, + 0xd3b5505c, 0x016016da, 0x6fec0d9b, 0xdc782058, 0xb2f43b19, + 0xfcd70040, 0x925b1b01, 0x21cf36c2, 0x4f432d83, 0x9d966b05, + 0xf31a7044, 0x408e5d87, 0x2e0246c6, 0x3e55d6ca, 0x50d9cd8b, + 0xe34de048, 0x8dc1fb09, 0x5f14bd8f, 0x3198a6ce, 0x820c8b0d, + 0xec80904c, 0x1e4afdbf, 0x70c6e6fe, 0xc352cb3d, 0xadded07c, + 0x7f0b96fa, 0x11878dbb, 0xa213a078, 0xcc9fbb39, 0xdcc82b35, + 0xb2443074, 0x01d01db7, 0x6f5c06f6, 0xbd894070, 0xd3055b31, + 0x609176f2, 0x0e1d6db3, 0x403e56ea, 0x2eb24dab, 0x9d266068, + 0xf3aa7b29, 0x217f3daf, 0x4ff326ee, 0xfc670b2d, 0x92eb106c, + 0x82bc8060, 0xec309b21, 0x5fa4b6e2, 0x3128ada3, 0xe3fdeb25, + 0x8d71f064, 0x3ee5dda7, 0x5069c6e6, 0x9e36506b, 0xf0ba4b2a, + 0x432e66e9, 0x2da27da8, 0xff773b2e, 0x91fb206f, 0x226f0dac, + 0x4ce316ed, 0x5cb486e1, 0x32389da0, 0x81acb063, 0xef20ab22, + 0x3df5eda4, 0x5379f6e5, 0xe0eddb26, 0x8e61c067, 0xc042fb3e, + 0xaecee07f, 0x1d5acdbc, 0x73d6d6fd, 0xa103907b, 0xcf8f8b3a, + 0x7c1ba6f9, 0x1297bdb8, 0x02c02db4, 0x6c4c36f5, 0xdfd81b36, + 0xb1540077, 0x638146f1, 0x0d0d5db0, 0xbe997073, 0xd0156b32, + 0x22df06c1, 0x4c531d80, 0xffc73043, 0x914b2b02, 0x439e6d84, + 0x2d1276c5, 0x9e865b06, 0xf00a4047, 0xe05dd04b, 0x8ed1cb0a, + 0x3d45e6c9, 0x53c9fd88, 0x811cbb0e, 0xef90a04f, 0x5c048d8c, + 0x328896cd, 0x7cabad94, 0x1227b6d5, 0xa1b39b16, 0xcf3f8057, + 0x1deac6d1, 0x7366dd90, 0xc0f2f053, 0xae7eeb12, 0xbe297b1e, + 0xd0a5605f, 0x63314d9c, 0x0dbd56dd, 0xdf68105b, 0xb1e40b1a, + 0x027026d9, 0x6cfc3d98, 0x3c95fb7e, 0x5219e03f, 0xe18dcdfc, + 0x8f01d6bd, 0x5dd4903b, 0x33588b7a, 0x80cca6b9, 0xee40bdf8, + 0xfe172df4, 0x909b36b5, 0x230f1b76, 0x4d830037, 0x9f5646b1, + 0xf1da5df0, 0x424e7033, 0x2cc26b72, 0x62e1502b, 0x0c6d4b6a, + 0xbff966a9, 0xd1757de8, 0x03a03b6e, 0x6d2c202f, 0xdeb80dec, + 0xb03416ad, 0xa06386a1, 0xceef9de0, 0x7d7bb023, 0x13f7ab62, + 0xc122ede4, 0xafaef6a5, 0x1c3adb66, 0x72b6c027, 0x807cadd4, + 0xeef0b695, 0x5d649b56, 0x33e88017, 0xe13dc691, 0x8fb1ddd0, + 0x3c25f013, 0x52a9eb52, 0x42fe7b5e, 0x2c72601f, 0x9fe64ddc, + 0xf16a569d, 0x23bf101b, 0x4d330b5a, 0xfea72699, 0x902b3dd8, + 0xde080681, 0xb0841dc0, 0x03103003, 0x6d9c2b42, 0xbf496dc4, + 0xd1c57685, 0x62515b46, 0x0cdd4007, 0x1c8ad00b, 0x7206cb4a, + 0xc192e689, 0xaf1efdc8, 0x7dcbbb4e, 0x1347a00f, 0xa0d38dcc, + 0xce5f968d}, + {0x00000000, 0xe71da697, 0x154a4b6f, 0xf257edf8, 0x2a9496de, + 0xcd893049, 0x3fdeddb1, 0xd8c37b26, 0x55292dbc, 0xb2348b2b, + 0x406366d3, 0xa77ec044, 0x7fbdbb62, 0x98a01df5, 0x6af7f00d, + 0x8dea569a, 0xaa525b78, 0x4d4ffdef, 0xbf181017, 0x5805b680, + 0x80c6cda6, 0x67db6b31, 0x958c86c9, 0x7291205e, 0xff7b76c4, + 0x1866d053, 0xea313dab, 0x0d2c9b3c, 0xd5efe01a, 0x32f2468d, + 0xc0a5ab75, 0x27b80de2, 0x8fd5b0b1, 0x68c81626, 0x9a9ffbde, + 0x7d825d49, 0xa541266f, 0x425c80f8, 0xb00b6d00, 0x5716cb97, + 0xdafc9d0d, 0x3de13b9a, 0xcfb6d662, 0x28ab70f5, 0xf0680bd3, + 0x1775ad44, 0xe52240bc, 0x023fe62b, 0x2587ebc9, 0xc29a4d5e, + 0x30cda0a6, 0xd7d00631, 0x0f137d17, 0xe80edb80, 0x1a593678, + 0xfd4490ef, 0x70aec675, 0x97b360e2, 0x65e48d1a, 0x82f92b8d, + 0x5a3a50ab, 0xbd27f63c, 0x4f701bc4, 0xa86dbd53, 0xc4da6723, + 0x23c7c1b4, 0xd1902c4c, 0x368d8adb, 0xee4ef1fd, 0x0953576a, + 0xfb04ba92, 0x1c191c05, 0x91f34a9f, 0x76eeec08, 0x84b901f0, + 0x63a4a767, 0xbb67dc41, 0x5c7a7ad6, 0xae2d972e, 0x493031b9, + 0x6e883c5b, 0x89959acc, 0x7bc27734, 0x9cdfd1a3, 0x441caa85, + 0xa3010c12, 0x5156e1ea, 0xb64b477d, 0x3ba111e7, 0xdcbcb770, + 0x2eeb5a88, 0xc9f6fc1f, 0x11358739, 0xf62821ae, 0x047fcc56, + 0xe3626ac1, 0x4b0fd792, 0xac127105, 0x5e459cfd, 0xb9583a6a, + 0x619b414c, 0x8686e7db, 0x74d10a23, 0x93ccacb4, 0x1e26fa2e, + 0xf93b5cb9, 0x0b6cb141, 0xec7117d6, 0x34b26cf0, 0xd3afca67, + 0x21f8279f, 0xc6e58108, 0xe15d8cea, 0x06402a7d, 0xf417c785, + 0x130a6112, 0xcbc91a34, 0x2cd4bca3, 0xde83515b, 0x399ef7cc, + 0xb474a156, 0x536907c1, 0xa13eea39, 0x46234cae, 0x9ee03788, + 0x79fd911f, 0x8baa7ce7, 0x6cb7da70, 0x52c5c807, 0xb5d86e90, + 0x478f8368, 0xa09225ff, 0x78515ed9, 0x9f4cf84e, 0x6d1b15b6, + 0x8a06b321, 0x07ece5bb, 0xe0f1432c, 0x12a6aed4, 0xf5bb0843, + 0x2d787365, 0xca65d5f2, 0x3832380a, 0xdf2f9e9d, 0xf897937f, + 0x1f8a35e8, 0xedddd810, 0x0ac07e87, 0xd20305a1, 0x351ea336, + 0xc7494ece, 0x2054e859, 0xadbebec3, 0x4aa31854, 0xb8f4f5ac, + 0x5fe9533b, 0x872a281d, 0x60378e8a, 0x92606372, 0x757dc5e5, + 0xdd1078b6, 0x3a0dde21, 0xc85a33d9, 0x2f47954e, 0xf784ee68, + 0x109948ff, 0xe2cea507, 0x05d30390, 0x8839550a, 0x6f24f39d, + 0x9d731e65, 0x7a6eb8f2, 0xa2adc3d4, 0x45b06543, 0xb7e788bb, + 0x50fa2e2c, 0x774223ce, 0x905f8559, 0x620868a1, 0x8515ce36, + 0x5dd6b510, 0xbacb1387, 0x489cfe7f, 0xaf8158e8, 0x226b0e72, + 0xc576a8e5, 0x3721451d, 0xd03ce38a, 0x08ff98ac, 0xefe23e3b, + 0x1db5d3c3, 0xfaa87554, 0x961faf24, 0x710209b3, 0x8355e44b, + 0x644842dc, 0xbc8b39fa, 0x5b969f6d, 0xa9c17295, 0x4edcd402, + 0xc3368298, 0x242b240f, 0xd67cc9f7, 0x31616f60, 0xe9a21446, + 0x0ebfb2d1, 0xfce85f29, 0x1bf5f9be, 0x3c4df45c, 0xdb5052cb, + 0x2907bf33, 0xce1a19a4, 0x16d96282, 0xf1c4c415, 0x039329ed, + 0xe48e8f7a, 0x6964d9e0, 0x8e797f77, 0x7c2e928f, 0x9b333418, + 0x43f04f3e, 0xa4ede9a9, 0x56ba0451, 0xb1a7a2c6, 0x19ca1f95, + 0xfed7b902, 0x0c8054fa, 0xeb9df26d, 0x335e894b, 0xd4432fdc, + 0x2614c224, 0xc10964b3, 0x4ce33229, 0xabfe94be, 0x59a97946, + 0xbeb4dfd1, 0x6677a4f7, 0x816a0260, 0x733def98, 0x9420490f, + 0xb39844ed, 0x5485e27a, 0xa6d20f82, 0x41cfa915, 0x990cd233, + 0x7e1174a4, 0x8c46995c, 0x6b5b3fcb, 0xe6b16951, 0x01accfc6, + 0xf3fb223e, 0x14e684a9, 0xcc25ff8f, 0x2b385918, 0xd96fb4e0, + 0x3e721277}, + {0x00000000, 0xa58b900e, 0x9066265d, 0x35edb653, 0xfbbd4afb, + 0x5e36daf5, 0x6bdb6ca6, 0xce50fca8, 0x2c0b93b7, 0x898003b9, + 0xbc6db5ea, 0x19e625e4, 0xd7b6d94c, 0x723d4942, 0x47d0ff11, + 0xe25b6f1f, 0x5817276e, 0xfd9cb760, 0xc8710133, 0x6dfa913d, + 0xa3aa6d95, 0x0621fd9b, 0x33cc4bc8, 0x9647dbc6, 0x741cb4d9, + 0xd19724d7, 0xe47a9284, 0x41f1028a, 0x8fa1fe22, 0x2a2a6e2c, + 0x1fc7d87f, 0xba4c4871, 0xb02e4edc, 0x15a5ded2, 0x20486881, + 0x85c3f88f, 0x4b930427, 0xee189429, 0xdbf5227a, 0x7e7eb274, + 0x9c25dd6b, 0x39ae4d65, 0x0c43fb36, 0xa9c86b38, 0x67989790, + 0xc213079e, 0xf7feb1cd, 0x527521c3, 0xe83969b2, 0x4db2f9bc, + 0x785f4fef, 0xddd4dfe1, 0x13842349, 0xb60fb347, 0x83e20514, + 0x2669951a, 0xc432fa05, 0x61b96a0b, 0x5454dc58, 0xf1df4c56, + 0x3f8fb0fe, 0x9a0420f0, 0xafe996a3, 0x0a6206ad, 0xbb2d9bf9, + 0x1ea60bf7, 0x2b4bbda4, 0x8ec02daa, 0x4090d102, 0xe51b410c, + 0xd0f6f75f, 0x757d6751, 0x9726084e, 0x32ad9840, 0x07402e13, + 0xa2cbbe1d, 0x6c9b42b5, 0xc910d2bb, 0xfcfd64e8, 0x5976f4e6, + 0xe33abc97, 0x46b12c99, 0x735c9aca, 0xd6d70ac4, 0x1887f66c, + 0xbd0c6662, 0x88e1d031, 0x2d6a403f, 0xcf312f20, 0x6ababf2e, + 0x5f57097d, 0xfadc9973, 0x348c65db, 0x9107f5d5, 0xa4ea4386, + 0x0161d388, 0x0b03d525, 0xae88452b, 0x9b65f378, 0x3eee6376, + 0xf0be9fde, 0x55350fd0, 0x60d8b983, 0xc553298d, 0x27084692, + 0x8283d69c, 0xb76e60cf, 0x12e5f0c1, 0xdcb50c69, 0x793e9c67, + 0x4cd32a34, 0xe958ba3a, 0x5314f24b, 0xf69f6245, 0xc372d416, + 0x66f94418, 0xa8a9b8b0, 0x0d2228be, 0x38cf9eed, 0x9d440ee3, + 0x7f1f61fc, 0xda94f1f2, 0xef7947a1, 0x4af2d7af, 0x84a22b07, + 0x2129bb09, 0x14c40d5a, 0xb14f9d54, 0xad2a31b3, 0x08a1a1bd, + 0x3d4c17ee, 0x98c787e0, 0x56977b48, 0xf31ceb46, 0xc6f15d15, + 0x637acd1b, 0x8121a204, 0x24aa320a, 0x11478459, 0xb4cc1457, + 0x7a9ce8ff, 0xdf1778f1, 0xeafacea2, 0x4f715eac, 0xf53d16dd, + 0x50b686d3, 0x655b3080, 0xc0d0a08e, 0x0e805c26, 0xab0bcc28, + 0x9ee67a7b, 0x3b6dea75, 0xd936856a, 0x7cbd1564, 0x4950a337, + 0xecdb3339, 0x228bcf91, 0x87005f9f, 0xb2ede9cc, 0x176679c2, + 0x1d047f6f, 0xb88fef61, 0x8d625932, 0x28e9c93c, 0xe6b93594, + 0x4332a59a, 0x76df13c9, 0xd35483c7, 0x310fecd8, 0x94847cd6, + 0xa169ca85, 0x04e25a8b, 0xcab2a623, 0x6f39362d, 0x5ad4807e, + 0xff5f1070, 0x45135801, 0xe098c80f, 0xd5757e5c, 0x70feee52, + 0xbeae12fa, 0x1b2582f4, 0x2ec834a7, 0x8b43a4a9, 0x6918cbb6, + 0xcc935bb8, 0xf97eedeb, 0x5cf57de5, 0x92a5814d, 0x372e1143, + 0x02c3a710, 0xa748371e, 0x1607aa4a, 0xb38c3a44, 0x86618c17, + 0x23ea1c19, 0xedbae0b1, 0x483170bf, 0x7ddcc6ec, 0xd85756e2, + 0x3a0c39fd, 0x9f87a9f3, 0xaa6a1fa0, 0x0fe18fae, 0xc1b17306, + 0x643ae308, 0x51d7555b, 0xf45cc555, 0x4e108d24, 0xeb9b1d2a, + 0xde76ab79, 0x7bfd3b77, 0xb5adc7df, 0x102657d1, 0x25cbe182, + 0x8040718c, 0x621b1e93, 0xc7908e9d, 0xf27d38ce, 0x57f6a8c0, + 0x99a65468, 0x3c2dc466, 0x09c07235, 0xac4be23b, 0xa629e496, + 0x03a27498, 0x364fc2cb, 0x93c452c5, 0x5d94ae6d, 0xf81f3e63, + 0xcdf28830, 0x6879183e, 0x8a227721, 0x2fa9e72f, 0x1a44517c, + 0xbfcfc172, 0x719f3dda, 0xd414add4, 0xe1f91b87, 0x44728b89, + 0xfe3ec3f8, 0x5bb553f6, 0x6e58e5a5, 0xcbd375ab, 0x05838903, + 0xa008190d, 0x95e5af5e, 0x306e3f50, 0xd235504f, 0x77bec041, + 0x42537612, 0xe7d8e61c, 0x29881ab4, 0x8c038aba, 0xb9ee3ce9, + 0x1c65ace7}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0x0e908ba500000000, 0x5d26669000000000, + 0x53b6ed3500000000, 0xfb4abdfb00000000, 0xf5da365e00000000, + 0xa66cdb6b00000000, 0xa8fc50ce00000000, 0xb7930b2c00000000, + 0xb903808900000000, 0xeab56dbc00000000, 0xe425e61900000000, + 0x4cd9b6d700000000, 0x42493d7200000000, 0x11ffd04700000000, + 0x1f6f5be200000000, 0x6e27175800000000, 0x60b79cfd00000000, + 0x330171c800000000, 0x3d91fa6d00000000, 0x956daaa300000000, + 0x9bfd210600000000, 0xc84bcc3300000000, 0xc6db479600000000, + 0xd9b41c7400000000, 0xd72497d100000000, 0x84927ae400000000, + 0x8a02f14100000000, 0x22fea18f00000000, 0x2c6e2a2a00000000, + 0x7fd8c71f00000000, 0x71484cba00000000, 0xdc4e2eb000000000, + 0xd2dea51500000000, 0x8168482000000000, 0x8ff8c38500000000, + 0x2704934b00000000, 0x299418ee00000000, 0x7a22f5db00000000, + 0x74b27e7e00000000, 0x6bdd259c00000000, 0x654dae3900000000, + 0x36fb430c00000000, 0x386bc8a900000000, 0x9097986700000000, + 0x9e0713c200000000, 0xcdb1fef700000000, 0xc321755200000000, + 0xb26939e800000000, 0xbcf9b24d00000000, 0xef4f5f7800000000, + 0xe1dfd4dd00000000, 0x4923841300000000, 0x47b30fb600000000, + 0x1405e28300000000, 0x1a95692600000000, 0x05fa32c400000000, + 0x0b6ab96100000000, 0x58dc545400000000, 0x564cdff100000000, + 0xfeb08f3f00000000, 0xf020049a00000000, 0xa396e9af00000000, + 0xad06620a00000000, 0xf99b2dbb00000000, 0xf70ba61e00000000, + 0xa4bd4b2b00000000, 0xaa2dc08e00000000, 0x02d1904000000000, + 0x0c411be500000000, 0x5ff7f6d000000000, 0x51677d7500000000, + 0x4e08269700000000, 0x4098ad3200000000, 0x132e400700000000, + 0x1dbecba200000000, 0xb5429b6c00000000, 0xbbd210c900000000, + 0xe864fdfc00000000, 0xe6f4765900000000, 0x97bc3ae300000000, + 0x992cb14600000000, 0xca9a5c7300000000, 0xc40ad7d600000000, + 0x6cf6871800000000, 0x62660cbd00000000, 0x31d0e18800000000, + 0x3f406a2d00000000, 0x202f31cf00000000, 0x2ebfba6a00000000, + 0x7d09575f00000000, 0x7399dcfa00000000, 0xdb658c3400000000, + 0xd5f5079100000000, 0x8643eaa400000000, 0x88d3610100000000, + 0x25d5030b00000000, 0x2b4588ae00000000, 0x78f3659b00000000, + 0x7663ee3e00000000, 0xde9fbef000000000, 0xd00f355500000000, + 0x83b9d86000000000, 0x8d2953c500000000, 0x9246082700000000, + 0x9cd6838200000000, 0xcf606eb700000000, 0xc1f0e51200000000, + 0x690cb5dc00000000, 0x679c3e7900000000, 0x342ad34c00000000, + 0x3aba58e900000000, 0x4bf2145300000000, 0x45629ff600000000, + 0x16d472c300000000, 0x1844f96600000000, 0xb0b8a9a800000000, + 0xbe28220d00000000, 0xed9ecf3800000000, 0xe30e449d00000000, + 0xfc611f7f00000000, 0xf2f194da00000000, 0xa14779ef00000000, + 0xafd7f24a00000000, 0x072ba28400000000, 0x09bb292100000000, + 0x5a0dc41400000000, 0x549d4fb100000000, 0xb3312aad00000000, + 0xbda1a10800000000, 0xee174c3d00000000, 0xe087c79800000000, + 0x487b975600000000, 0x46eb1cf300000000, 0x155df1c600000000, + 0x1bcd7a6300000000, 0x04a2218100000000, 0x0a32aa2400000000, + 0x5984471100000000, 0x5714ccb400000000, 0xffe89c7a00000000, + 0xf17817df00000000, 0xa2cefaea00000000, 0xac5e714f00000000, + 0xdd163df500000000, 0xd386b65000000000, 0x80305b6500000000, + 0x8ea0d0c000000000, 0x265c800e00000000, 0x28cc0bab00000000, + 0x7b7ae69e00000000, 0x75ea6d3b00000000, 0x6a8536d900000000, + 0x6415bd7c00000000, 0x37a3504900000000, 0x3933dbec00000000, + 0x91cf8b2200000000, 0x9f5f008700000000, 0xcce9edb200000000, + 0xc279661700000000, 0x6f7f041d00000000, 0x61ef8fb800000000, + 0x3259628d00000000, 0x3cc9e92800000000, 0x9435b9e600000000, + 0x9aa5324300000000, 0xc913df7600000000, 0xc78354d300000000, + 0xd8ec0f3100000000, 0xd67c849400000000, 0x85ca69a100000000, + 0x8b5ae20400000000, 0x23a6b2ca00000000, 0x2d36396f00000000, + 0x7e80d45a00000000, 0x70105fff00000000, 0x0158134500000000, + 0x0fc898e000000000, 0x5c7e75d500000000, 0x52eefe7000000000, + 0xfa12aebe00000000, 0xf482251b00000000, 0xa734c82e00000000, + 0xa9a4438b00000000, 0xb6cb186900000000, 0xb85b93cc00000000, + 0xebed7ef900000000, 0xe57df55c00000000, 0x4d81a59200000000, + 0x43112e3700000000, 0x10a7c30200000000, 0x1e3748a700000000, + 0x4aaa071600000000, 0x443a8cb300000000, 0x178c618600000000, + 0x191cea2300000000, 0xb1e0baed00000000, 0xbf70314800000000, + 0xecc6dc7d00000000, 0xe25657d800000000, 0xfd390c3a00000000, + 0xf3a9879f00000000, 0xa01f6aaa00000000, 0xae8fe10f00000000, + 0x0673b1c100000000, 0x08e33a6400000000, 0x5b55d75100000000, + 0x55c55cf400000000, 0x248d104e00000000, 0x2a1d9beb00000000, + 0x79ab76de00000000, 0x773bfd7b00000000, 0xdfc7adb500000000, + 0xd157261000000000, 0x82e1cb2500000000, 0x8c71408000000000, + 0x931e1b6200000000, 0x9d8e90c700000000, 0xce387df200000000, + 0xc0a8f65700000000, 0x6854a69900000000, 0x66c42d3c00000000, + 0x3572c00900000000, 0x3be24bac00000000, 0x96e429a600000000, + 0x9874a20300000000, 0xcbc24f3600000000, 0xc552c49300000000, + 0x6dae945d00000000, 0x633e1ff800000000, 0x3088f2cd00000000, + 0x3e18796800000000, 0x2177228a00000000, 0x2fe7a92f00000000, + 0x7c51441a00000000, 0x72c1cfbf00000000, 0xda3d9f7100000000, + 0xd4ad14d400000000, 0x871bf9e100000000, 0x898b724400000000, + 0xf8c33efe00000000, 0xf653b55b00000000, 0xa5e5586e00000000, + 0xab75d3cb00000000, 0x0389830500000000, 0x0d1908a000000000, + 0x5eafe59500000000, 0x503f6e3000000000, 0x4f5035d200000000, + 0x41c0be7700000000, 0x1276534200000000, 0x1ce6d8e700000000, + 0xb41a882900000000, 0xba8a038c00000000, 0xe93ceeb900000000, + 0xe7ac651c00000000}, + {0x0000000000000000, 0x97a61de700000000, 0x6f4b4a1500000000, + 0xf8ed57f200000000, 0xde96942a00000000, 0x493089cd00000000, + 0xb1ddde3f00000000, 0x267bc3d800000000, 0xbc2d295500000000, + 0x2b8b34b200000000, 0xd366634000000000, 0x44c07ea700000000, + 0x62bbbd7f00000000, 0xf51da09800000000, 0x0df0f76a00000000, + 0x9a56ea8d00000000, 0x785b52aa00000000, 0xeffd4f4d00000000, + 0x171018bf00000000, 0x80b6055800000000, 0xa6cdc68000000000, + 0x316bdb6700000000, 0xc9868c9500000000, 0x5e20917200000000, + 0xc4767bff00000000, 0x53d0661800000000, 0xab3d31ea00000000, + 0x3c9b2c0d00000000, 0x1ae0efd500000000, 0x8d46f23200000000, + 0x75aba5c000000000, 0xe20db82700000000, 0xb1b0d58f00000000, + 0x2616c86800000000, 0xdefb9f9a00000000, 0x495d827d00000000, + 0x6f2641a500000000, 0xf8805c4200000000, 0x006d0bb000000000, + 0x97cb165700000000, 0x0d9dfcda00000000, 0x9a3be13d00000000, + 0x62d6b6cf00000000, 0xf570ab2800000000, 0xd30b68f000000000, + 0x44ad751700000000, 0xbc4022e500000000, 0x2be63f0200000000, + 0xc9eb872500000000, 0x5e4d9ac200000000, 0xa6a0cd3000000000, + 0x3106d0d700000000, 0x177d130f00000000, 0x80db0ee800000000, + 0x7836591a00000000, 0xef9044fd00000000, 0x75c6ae7000000000, + 0xe260b39700000000, 0x1a8de46500000000, 0x8d2bf98200000000, + 0xab503a5a00000000, 0x3cf627bd00000000, 0xc41b704f00000000, + 0x53bd6da800000000, 0x2367dac400000000, 0xb4c1c72300000000, + 0x4c2c90d100000000, 0xdb8a8d3600000000, 0xfdf14eee00000000, + 0x6a57530900000000, 0x92ba04fb00000000, 0x051c191c00000000, + 0x9f4af39100000000, 0x08ecee7600000000, 0xf001b98400000000, + 0x67a7a46300000000, 0x41dc67bb00000000, 0xd67a7a5c00000000, + 0x2e972dae00000000, 0xb931304900000000, 0x5b3c886e00000000, + 0xcc9a958900000000, 0x3477c27b00000000, 0xa3d1df9c00000000, + 0x85aa1c4400000000, 0x120c01a300000000, 0xeae1565100000000, + 0x7d474bb600000000, 0xe711a13b00000000, 0x70b7bcdc00000000, + 0x885aeb2e00000000, 0x1ffcf6c900000000, 0x3987351100000000, + 0xae2128f600000000, 0x56cc7f0400000000, 0xc16a62e300000000, + 0x92d70f4b00000000, 0x057112ac00000000, 0xfd9c455e00000000, + 0x6a3a58b900000000, 0x4c419b6100000000, 0xdbe7868600000000, + 0x230ad17400000000, 0xb4accc9300000000, 0x2efa261e00000000, + 0xb95c3bf900000000, 0x41b16c0b00000000, 0xd61771ec00000000, + 0xf06cb23400000000, 0x67caafd300000000, 0x9f27f82100000000, + 0x0881e5c600000000, 0xea8c5de100000000, 0x7d2a400600000000, + 0x85c717f400000000, 0x12610a1300000000, 0x341ac9cb00000000, + 0xa3bcd42c00000000, 0x5b5183de00000000, 0xccf79e3900000000, + 0x56a174b400000000, 0xc107695300000000, 0x39ea3ea100000000, + 0xae4c234600000000, 0x8837e09e00000000, 0x1f91fd7900000000, + 0xe77caa8b00000000, 0x70dab76c00000000, 0x07c8c55200000000, + 0x906ed8b500000000, 0x68838f4700000000, 0xff2592a000000000, + 0xd95e517800000000, 0x4ef84c9f00000000, 0xb6151b6d00000000, + 0x21b3068a00000000, 0xbbe5ec0700000000, 0x2c43f1e000000000, + 0xd4aea61200000000, 0x4308bbf500000000, 0x6573782d00000000, + 0xf2d565ca00000000, 0x0a38323800000000, 0x9d9e2fdf00000000, + 0x7f9397f800000000, 0xe8358a1f00000000, 0x10d8dded00000000, + 0x877ec00a00000000, 0xa10503d200000000, 0x36a31e3500000000, + 0xce4e49c700000000, 0x59e8542000000000, 0xc3bebead00000000, + 0x5418a34a00000000, 0xacf5f4b800000000, 0x3b53e95f00000000, + 0x1d282a8700000000, 0x8a8e376000000000, 0x7263609200000000, + 0xe5c57d7500000000, 0xb67810dd00000000, 0x21de0d3a00000000, + 0xd9335ac800000000, 0x4e95472f00000000, 0x68ee84f700000000, + 0xff48991000000000, 0x07a5cee200000000, 0x9003d30500000000, + 0x0a55398800000000, 0x9df3246f00000000, 0x651e739d00000000, + 0xf2b86e7a00000000, 0xd4c3ada200000000, 0x4365b04500000000, + 0xbb88e7b700000000, 0x2c2efa5000000000, 0xce23427700000000, + 0x59855f9000000000, 0xa168086200000000, 0x36ce158500000000, + 0x10b5d65d00000000, 0x8713cbba00000000, 0x7ffe9c4800000000, + 0xe85881af00000000, 0x720e6b2200000000, 0xe5a876c500000000, + 0x1d45213700000000, 0x8ae33cd000000000, 0xac98ff0800000000, + 0x3b3ee2ef00000000, 0xc3d3b51d00000000, 0x5475a8fa00000000, + 0x24af1f9600000000, 0xb309027100000000, 0x4be4558300000000, + 0xdc42486400000000, 0xfa398bbc00000000, 0x6d9f965b00000000, + 0x9572c1a900000000, 0x02d4dc4e00000000, 0x988236c300000000, + 0x0f242b2400000000, 0xf7c97cd600000000, 0x606f613100000000, + 0x4614a2e900000000, 0xd1b2bf0e00000000, 0x295fe8fc00000000, + 0xbef9f51b00000000, 0x5cf44d3c00000000, 0xcb5250db00000000, + 0x33bf072900000000, 0xa4191ace00000000, 0x8262d91600000000, + 0x15c4c4f100000000, 0xed29930300000000, 0x7a8f8ee400000000, + 0xe0d9646900000000, 0x777f798e00000000, 0x8f922e7c00000000, + 0x1834339b00000000, 0x3e4ff04300000000, 0xa9e9eda400000000, + 0x5104ba5600000000, 0xc6a2a7b100000000, 0x951fca1900000000, + 0x02b9d7fe00000000, 0xfa54800c00000000, 0x6df29deb00000000, + 0x4b895e3300000000, 0xdc2f43d400000000, 0x24c2142600000000, + 0xb36409c100000000, 0x2932e34c00000000, 0xbe94feab00000000, + 0x4679a95900000000, 0xd1dfb4be00000000, 0xf7a4776600000000, + 0x60026a8100000000, 0x98ef3d7300000000, 0x0f49209400000000, + 0xed4498b300000000, 0x7ae2855400000000, 0x820fd2a600000000, + 0x15a9cf4100000000, 0x33d20c9900000000, 0xa474117e00000000, + 0x5c99468c00000000, 0xcb3f5b6b00000000, 0x5169b1e600000000, + 0xc6cfac0100000000, 0x3e22fbf300000000, 0xa984e61400000000, + 0x8fff25cc00000000, 0x1859382b00000000, 0xe0b46fd900000000, + 0x7712723e00000000}, + {0x0000000000000000, 0x411b8c6e00000000, 0x823618dd00000000, + 0xc32d94b300000000, 0x456b416100000000, 0x0470cd0f00000000, + 0xc75d59bc00000000, 0x8646d5d200000000, 0x8ad682c200000000, + 0xcbcd0eac00000000, 0x08e09a1f00000000, 0x49fb167100000000, + 0xcfbdc3a300000000, 0x8ea64fcd00000000, 0x4d8bdb7e00000000, + 0x0c90571000000000, 0x55ab745e00000000, 0x14b0f83000000000, + 0xd79d6c8300000000, 0x9686e0ed00000000, 0x10c0353f00000000, + 0x51dbb95100000000, 0x92f62de200000000, 0xd3eda18c00000000, + 0xdf7df69c00000000, 0x9e667af200000000, 0x5d4bee4100000000, + 0x1c50622f00000000, 0x9a16b7fd00000000, 0xdb0d3b9300000000, + 0x1820af2000000000, 0x593b234e00000000, 0xaa56e9bc00000000, + 0xeb4d65d200000000, 0x2860f16100000000, 0x697b7d0f00000000, + 0xef3da8dd00000000, 0xae2624b300000000, 0x6d0bb00000000000, + 0x2c103c6e00000000, 0x20806b7e00000000, 0x619be71000000000, + 0xa2b673a300000000, 0xe3adffcd00000000, 0x65eb2a1f00000000, + 0x24f0a67100000000, 0xe7dd32c200000000, 0xa6c6beac00000000, + 0xfffd9de200000000, 0xbee6118c00000000, 0x7dcb853f00000000, + 0x3cd0095100000000, 0xba96dc8300000000, 0xfb8d50ed00000000, + 0x38a0c45e00000000, 0x79bb483000000000, 0x752b1f2000000000, + 0x3430934e00000000, 0xf71d07fd00000000, 0xb6068b9300000000, + 0x30405e4100000000, 0x715bd22f00000000, 0xb276469c00000000, + 0xf36dcaf200000000, 0x15aba3a200000000, 0x54b02fcc00000000, + 0x979dbb7f00000000, 0xd686371100000000, 0x50c0e2c300000000, + 0x11db6ead00000000, 0xd2f6fa1e00000000, 0x93ed767000000000, + 0x9f7d216000000000, 0xde66ad0e00000000, 0x1d4b39bd00000000, + 0x5c50b5d300000000, 0xda16600100000000, 0x9b0dec6f00000000, + 0x582078dc00000000, 0x193bf4b200000000, 0x4000d7fc00000000, + 0x011b5b9200000000, 0xc236cf2100000000, 0x832d434f00000000, + 0x056b969d00000000, 0x44701af300000000, 0x875d8e4000000000, + 0xc646022e00000000, 0xcad6553e00000000, 0x8bcdd95000000000, + 0x48e04de300000000, 0x09fbc18d00000000, 0x8fbd145f00000000, + 0xcea6983100000000, 0x0d8b0c8200000000, 0x4c9080ec00000000, + 0xbffd4a1e00000000, 0xfee6c67000000000, 0x3dcb52c300000000, + 0x7cd0dead00000000, 0xfa960b7f00000000, 0xbb8d871100000000, + 0x78a013a200000000, 0x39bb9fcc00000000, 0x352bc8dc00000000, + 0x743044b200000000, 0xb71dd00100000000, 0xf6065c6f00000000, + 0x704089bd00000000, 0x315b05d300000000, 0xf276916000000000, + 0xb36d1d0e00000000, 0xea563e4000000000, 0xab4db22e00000000, + 0x6860269d00000000, 0x297baaf300000000, 0xaf3d7f2100000000, + 0xee26f34f00000000, 0x2d0b67fc00000000, 0x6c10eb9200000000, + 0x6080bc8200000000, 0x219b30ec00000000, 0xe2b6a45f00000000, + 0xa3ad283100000000, 0x25ebfde300000000, 0x64f0718d00000000, + 0xa7dde53e00000000, 0xe6c6695000000000, 0x6b50369e00000000, + 0x2a4bbaf000000000, 0xe9662e4300000000, 0xa87da22d00000000, + 0x2e3b77ff00000000, 0x6f20fb9100000000, 0xac0d6f2200000000, + 0xed16e34c00000000, 0xe186b45c00000000, 0xa09d383200000000, + 0x63b0ac8100000000, 0x22ab20ef00000000, 0xa4edf53d00000000, + 0xe5f6795300000000, 0x26dbede000000000, 0x67c0618e00000000, + 0x3efb42c000000000, 0x7fe0ceae00000000, 0xbccd5a1d00000000, + 0xfdd6d67300000000, 0x7b9003a100000000, 0x3a8b8fcf00000000, + 0xf9a61b7c00000000, 0xb8bd971200000000, 0xb42dc00200000000, + 0xf5364c6c00000000, 0x361bd8df00000000, 0x770054b100000000, + 0xf146816300000000, 0xb05d0d0d00000000, 0x737099be00000000, + 0x326b15d000000000, 0xc106df2200000000, 0x801d534c00000000, + 0x4330c7ff00000000, 0x022b4b9100000000, 0x846d9e4300000000, + 0xc576122d00000000, 0x065b869e00000000, 0x47400af000000000, + 0x4bd05de000000000, 0x0acbd18e00000000, 0xc9e6453d00000000, + 0x88fdc95300000000, 0x0ebb1c8100000000, 0x4fa090ef00000000, + 0x8c8d045c00000000, 0xcd96883200000000, 0x94adab7c00000000, + 0xd5b6271200000000, 0x169bb3a100000000, 0x57803fcf00000000, + 0xd1c6ea1d00000000, 0x90dd667300000000, 0x53f0f2c000000000, + 0x12eb7eae00000000, 0x1e7b29be00000000, 0x5f60a5d000000000, + 0x9c4d316300000000, 0xdd56bd0d00000000, 0x5b1068df00000000, + 0x1a0be4b100000000, 0xd926700200000000, 0x983dfc6c00000000, + 0x7efb953c00000000, 0x3fe0195200000000, 0xfccd8de100000000, + 0xbdd6018f00000000, 0x3b90d45d00000000, 0x7a8b583300000000, + 0xb9a6cc8000000000, 0xf8bd40ee00000000, 0xf42d17fe00000000, + 0xb5369b9000000000, 0x761b0f2300000000, 0x3700834d00000000, + 0xb146569f00000000, 0xf05ddaf100000000, 0x33704e4200000000, + 0x726bc22c00000000, 0x2b50e16200000000, 0x6a4b6d0c00000000, + 0xa966f9bf00000000, 0xe87d75d100000000, 0x6e3ba00300000000, + 0x2f202c6d00000000, 0xec0db8de00000000, 0xad1634b000000000, + 0xa18663a000000000, 0xe09defce00000000, 0x23b07b7d00000000, + 0x62abf71300000000, 0xe4ed22c100000000, 0xa5f6aeaf00000000, + 0x66db3a1c00000000, 0x27c0b67200000000, 0xd4ad7c8000000000, + 0x95b6f0ee00000000, 0x569b645d00000000, 0x1780e83300000000, + 0x91c63de100000000, 0xd0ddb18f00000000, 0x13f0253c00000000, + 0x52eba95200000000, 0x5e7bfe4200000000, 0x1f60722c00000000, + 0xdc4de69f00000000, 0x9d566af100000000, 0x1b10bf2300000000, + 0x5a0b334d00000000, 0x9926a7fe00000000, 0xd83d2b9000000000, + 0x810608de00000000, 0xc01d84b000000000, 0x0330100300000000, + 0x422b9c6d00000000, 0xc46d49bf00000000, 0x8576c5d100000000, + 0x465b516200000000, 0x0740dd0c00000000, 0x0bd08a1c00000000, + 0x4acb067200000000, 0x89e692c100000000, 0xc8fd1eaf00000000, + 0x4ebbcb7d00000000, 0x0fa0471300000000, 0xcc8dd3a000000000, + 0x8d965fce00000000}, + {0x0000000000000000, 0x1dfdb50100000000, 0x3afa6b0300000000, + 0x2707de0200000000, 0x74f4d70600000000, 0x6909620700000000, + 0x4e0ebc0500000000, 0x53f3090400000000, 0xe8e8af0d00000000, + 0xf5151a0c00000000, 0xd212c40e00000000, 0xcfef710f00000000, + 0x9c1c780b00000000, 0x81e1cd0a00000000, 0xa6e6130800000000, + 0xbb1ba60900000000, 0xd0d15f1b00000000, 0xcd2cea1a00000000, + 0xea2b341800000000, 0xf7d6811900000000, 0xa425881d00000000, + 0xb9d83d1c00000000, 0x9edfe31e00000000, 0x8322561f00000000, + 0x3839f01600000000, 0x25c4451700000000, 0x02c39b1500000000, + 0x1f3e2e1400000000, 0x4ccd271000000000, 0x5130921100000000, + 0x76374c1300000000, 0x6bcaf91200000000, 0xa0a3bf3600000000, + 0xbd5e0a3700000000, 0x9a59d43500000000, 0x87a4613400000000, + 0xd457683000000000, 0xc9aadd3100000000, 0xeead033300000000, + 0xf350b63200000000, 0x484b103b00000000, 0x55b6a53a00000000, + 0x72b17b3800000000, 0x6f4cce3900000000, 0x3cbfc73d00000000, + 0x2142723c00000000, 0x0645ac3e00000000, 0x1bb8193f00000000, + 0x7072e02d00000000, 0x6d8f552c00000000, 0x4a888b2e00000000, + 0x57753e2f00000000, 0x0486372b00000000, 0x197b822a00000000, + 0x3e7c5c2800000000, 0x2381e92900000000, 0x989a4f2000000000, + 0x8567fa2100000000, 0xa260242300000000, 0xbf9d912200000000, + 0xec6e982600000000, 0xf1932d2700000000, 0xd694f32500000000, + 0xcb69462400000000, 0x40477f6d00000000, 0x5dbaca6c00000000, + 0x7abd146e00000000, 0x6740a16f00000000, 0x34b3a86b00000000, + 0x294e1d6a00000000, 0x0e49c36800000000, 0x13b4766900000000, + 0xa8afd06000000000, 0xb552656100000000, 0x9255bb6300000000, + 0x8fa80e6200000000, 0xdc5b076600000000, 0xc1a6b26700000000, + 0xe6a16c6500000000, 0xfb5cd96400000000, 0x9096207600000000, + 0x8d6b957700000000, 0xaa6c4b7500000000, 0xb791fe7400000000, + 0xe462f77000000000, 0xf99f427100000000, 0xde989c7300000000, + 0xc365297200000000, 0x787e8f7b00000000, 0x65833a7a00000000, + 0x4284e47800000000, 0x5f79517900000000, 0x0c8a587d00000000, + 0x1177ed7c00000000, 0x3670337e00000000, 0x2b8d867f00000000, + 0xe0e4c05b00000000, 0xfd19755a00000000, 0xda1eab5800000000, + 0xc7e31e5900000000, 0x9410175d00000000, 0x89eda25c00000000, + 0xaeea7c5e00000000, 0xb317c95f00000000, 0x080c6f5600000000, + 0x15f1da5700000000, 0x32f6045500000000, 0x2f0bb15400000000, + 0x7cf8b85000000000, 0x61050d5100000000, 0x4602d35300000000, + 0x5bff665200000000, 0x30359f4000000000, 0x2dc82a4100000000, + 0x0acff44300000000, 0x1732414200000000, 0x44c1484600000000, + 0x593cfd4700000000, 0x7e3b234500000000, 0x63c6964400000000, + 0xd8dd304d00000000, 0xc520854c00000000, 0xe2275b4e00000000, + 0xffdaee4f00000000, 0xac29e74b00000000, 0xb1d4524a00000000, + 0x96d38c4800000000, 0x8b2e394900000000, 0x808efeda00000000, + 0x9d734bdb00000000, 0xba7495d900000000, 0xa78920d800000000, + 0xf47a29dc00000000, 0xe9879cdd00000000, 0xce8042df00000000, + 0xd37df7de00000000, 0x686651d700000000, 0x759be4d600000000, + 0x529c3ad400000000, 0x4f618fd500000000, 0x1c9286d100000000, + 0x016f33d000000000, 0x2668edd200000000, 0x3b9558d300000000, + 0x505fa1c100000000, 0x4da214c000000000, 0x6aa5cac200000000, + 0x77587fc300000000, 0x24ab76c700000000, 0x3956c3c600000000, + 0x1e511dc400000000, 0x03aca8c500000000, 0xb8b70ecc00000000, + 0xa54abbcd00000000, 0x824d65cf00000000, 0x9fb0d0ce00000000, + 0xcc43d9ca00000000, 0xd1be6ccb00000000, 0xf6b9b2c900000000, + 0xeb4407c800000000, 0x202d41ec00000000, 0x3dd0f4ed00000000, + 0x1ad72aef00000000, 0x072a9fee00000000, 0x54d996ea00000000, + 0x492423eb00000000, 0x6e23fde900000000, 0x73de48e800000000, + 0xc8c5eee100000000, 0xd5385be000000000, 0xf23f85e200000000, + 0xefc230e300000000, 0xbc3139e700000000, 0xa1cc8ce600000000, + 0x86cb52e400000000, 0x9b36e7e500000000, 0xf0fc1ef700000000, + 0xed01abf600000000, 0xca0675f400000000, 0xd7fbc0f500000000, + 0x8408c9f100000000, 0x99f57cf000000000, 0xbef2a2f200000000, + 0xa30f17f300000000, 0x1814b1fa00000000, 0x05e904fb00000000, + 0x22eedaf900000000, 0x3f136ff800000000, 0x6ce066fc00000000, + 0x711dd3fd00000000, 0x561a0dff00000000, 0x4be7b8fe00000000, + 0xc0c981b700000000, 0xdd3434b600000000, 0xfa33eab400000000, + 0xe7ce5fb500000000, 0xb43d56b100000000, 0xa9c0e3b000000000, + 0x8ec73db200000000, 0x933a88b300000000, 0x28212eba00000000, + 0x35dc9bbb00000000, 0x12db45b900000000, 0x0f26f0b800000000, + 0x5cd5f9bc00000000, 0x41284cbd00000000, 0x662f92bf00000000, + 0x7bd227be00000000, 0x1018deac00000000, 0x0de56bad00000000, + 0x2ae2b5af00000000, 0x371f00ae00000000, 0x64ec09aa00000000, + 0x7911bcab00000000, 0x5e1662a900000000, 0x43ebd7a800000000, + 0xf8f071a100000000, 0xe50dc4a000000000, 0xc20a1aa200000000, + 0xdff7afa300000000, 0x8c04a6a700000000, 0x91f913a600000000, + 0xb6fecda400000000, 0xab0378a500000000, 0x606a3e8100000000, + 0x7d978b8000000000, 0x5a90558200000000, 0x476de08300000000, + 0x149ee98700000000, 0x09635c8600000000, 0x2e64828400000000, + 0x3399378500000000, 0x8882918c00000000, 0x957f248d00000000, + 0xb278fa8f00000000, 0xaf854f8e00000000, 0xfc76468a00000000, + 0xe18bf38b00000000, 0xc68c2d8900000000, 0xdb71988800000000, + 0xb0bb619a00000000, 0xad46d49b00000000, 0x8a410a9900000000, + 0x97bcbf9800000000, 0xc44fb69c00000000, 0xd9b2039d00000000, + 0xfeb5dd9f00000000, 0xe348689e00000000, 0x5853ce9700000000, + 0x45ae7b9600000000, 0x62a9a59400000000, 0x7f54109500000000, + 0x2ca7199100000000, 0x315aac9000000000, 0x165d729200000000, + 0x0ba0c79300000000}, + {0x0000000000000000, 0x24d9076300000000, 0x48b20fc600000000, + 0x6c6b08a500000000, 0xd1626e5700000000, 0xf5bb693400000000, + 0x99d0619100000000, 0xbd0966f200000000, 0xa2c5dcae00000000, + 0x861cdbcd00000000, 0xea77d36800000000, 0xceaed40b00000000, + 0x73a7b2f900000000, 0x577eb59a00000000, 0x3b15bd3f00000000, + 0x1fccba5c00000000, 0x058dc88600000000, 0x2154cfe500000000, + 0x4d3fc74000000000, 0x69e6c02300000000, 0xd4efa6d100000000, + 0xf036a1b200000000, 0x9c5da91700000000, 0xb884ae7400000000, + 0xa748142800000000, 0x8391134b00000000, 0xeffa1bee00000000, + 0xcb231c8d00000000, 0x762a7a7f00000000, 0x52f37d1c00000000, + 0x3e9875b900000000, 0x1a4172da00000000, 0x4b1ce0d600000000, + 0x6fc5e7b500000000, 0x03aeef1000000000, 0x2777e87300000000, + 0x9a7e8e8100000000, 0xbea789e200000000, 0xd2cc814700000000, + 0xf615862400000000, 0xe9d93c7800000000, 0xcd003b1b00000000, + 0xa16b33be00000000, 0x85b234dd00000000, 0x38bb522f00000000, + 0x1c62554c00000000, 0x70095de900000000, 0x54d05a8a00000000, + 0x4e91285000000000, 0x6a482f3300000000, 0x0623279600000000, + 0x22fa20f500000000, 0x9ff3460700000000, 0xbb2a416400000000, + 0xd74149c100000000, 0xf3984ea200000000, 0xec54f4fe00000000, + 0xc88df39d00000000, 0xa4e6fb3800000000, 0x803ffc5b00000000, + 0x3d369aa900000000, 0x19ef9dca00000000, 0x7584956f00000000, + 0x515d920c00000000, 0xd73eb17600000000, 0xf3e7b61500000000, + 0x9f8cbeb000000000, 0xbb55b9d300000000, 0x065cdf2100000000, + 0x2285d84200000000, 0x4eeed0e700000000, 0x6a37d78400000000, + 0x75fb6dd800000000, 0x51226abb00000000, 0x3d49621e00000000, + 0x1990657d00000000, 0xa499038f00000000, 0x804004ec00000000, + 0xec2b0c4900000000, 0xc8f20b2a00000000, 0xd2b379f000000000, + 0xf66a7e9300000000, 0x9a01763600000000, 0xbed8715500000000, + 0x03d117a700000000, 0x270810c400000000, 0x4b63186100000000, + 0x6fba1f0200000000, 0x7076a55e00000000, 0x54afa23d00000000, + 0x38c4aa9800000000, 0x1c1dadfb00000000, 0xa114cb0900000000, + 0x85cdcc6a00000000, 0xe9a6c4cf00000000, 0xcd7fc3ac00000000, + 0x9c2251a000000000, 0xb8fb56c300000000, 0xd4905e6600000000, + 0xf049590500000000, 0x4d403ff700000000, 0x6999389400000000, + 0x05f2303100000000, 0x212b375200000000, 0x3ee78d0e00000000, + 0x1a3e8a6d00000000, 0x765582c800000000, 0x528c85ab00000000, + 0xef85e35900000000, 0xcb5ce43a00000000, 0xa737ec9f00000000, + 0x83eeebfc00000000, 0x99af992600000000, 0xbd769e4500000000, + 0xd11d96e000000000, 0xf5c4918300000000, 0x48cdf77100000000, + 0x6c14f01200000000, 0x007ff8b700000000, 0x24a6ffd400000000, + 0x3b6a458800000000, 0x1fb342eb00000000, 0x73d84a4e00000000, + 0x57014d2d00000000, 0xea082bdf00000000, 0xced12cbc00000000, + 0xa2ba241900000000, 0x8663237a00000000, 0xae7d62ed00000000, + 0x8aa4658e00000000, 0xe6cf6d2b00000000, 0xc2166a4800000000, + 0x7f1f0cba00000000, 0x5bc60bd900000000, 0x37ad037c00000000, + 0x1374041f00000000, 0x0cb8be4300000000, 0x2861b92000000000, + 0x440ab18500000000, 0x60d3b6e600000000, 0xdddad01400000000, + 0xf903d77700000000, 0x9568dfd200000000, 0xb1b1d8b100000000, + 0xabf0aa6b00000000, 0x8f29ad0800000000, 0xe342a5ad00000000, + 0xc79ba2ce00000000, 0x7a92c43c00000000, 0x5e4bc35f00000000, + 0x3220cbfa00000000, 0x16f9cc9900000000, 0x093576c500000000, + 0x2dec71a600000000, 0x4187790300000000, 0x655e7e6000000000, + 0xd857189200000000, 0xfc8e1ff100000000, 0x90e5175400000000, + 0xb43c103700000000, 0xe561823b00000000, 0xc1b8855800000000, + 0xadd38dfd00000000, 0x890a8a9e00000000, 0x3403ec6c00000000, + 0x10daeb0f00000000, 0x7cb1e3aa00000000, 0x5868e4c900000000, + 0x47a45e9500000000, 0x637d59f600000000, 0x0f16515300000000, + 0x2bcf563000000000, 0x96c630c200000000, 0xb21f37a100000000, + 0xde743f0400000000, 0xfaad386700000000, 0xe0ec4abd00000000, + 0xc4354dde00000000, 0xa85e457b00000000, 0x8c87421800000000, + 0x318e24ea00000000, 0x1557238900000000, 0x793c2b2c00000000, + 0x5de52c4f00000000, 0x4229961300000000, 0x66f0917000000000, + 0x0a9b99d500000000, 0x2e429eb600000000, 0x934bf84400000000, + 0xb792ff2700000000, 0xdbf9f78200000000, 0xff20f0e100000000, + 0x7943d39b00000000, 0x5d9ad4f800000000, 0x31f1dc5d00000000, + 0x1528db3e00000000, 0xa821bdcc00000000, 0x8cf8baaf00000000, + 0xe093b20a00000000, 0xc44ab56900000000, 0xdb860f3500000000, + 0xff5f085600000000, 0x933400f300000000, 0xb7ed079000000000, + 0x0ae4616200000000, 0x2e3d660100000000, 0x42566ea400000000, + 0x668f69c700000000, 0x7cce1b1d00000000, 0x58171c7e00000000, + 0x347c14db00000000, 0x10a513b800000000, 0xadac754a00000000, + 0x8975722900000000, 0xe51e7a8c00000000, 0xc1c77def00000000, + 0xde0bc7b300000000, 0xfad2c0d000000000, 0x96b9c87500000000, + 0xb260cf1600000000, 0x0f69a9e400000000, 0x2bb0ae8700000000, + 0x47dba62200000000, 0x6302a14100000000, 0x325f334d00000000, + 0x1686342e00000000, 0x7aed3c8b00000000, 0x5e343be800000000, + 0xe33d5d1a00000000, 0xc7e45a7900000000, 0xab8f52dc00000000, + 0x8f5655bf00000000, 0x909aefe300000000, 0xb443e88000000000, + 0xd828e02500000000, 0xfcf1e74600000000, 0x41f881b400000000, + 0x652186d700000000, 0x094a8e7200000000, 0x2d93891100000000, + 0x37d2fbcb00000000, 0x130bfca800000000, 0x7f60f40d00000000, + 0x5bb9f36e00000000, 0xe6b0959c00000000, 0xc26992ff00000000, + 0xae029a5a00000000, 0x8adb9d3900000000, 0x9517276500000000, + 0xb1ce200600000000, 0xdda528a300000000, 0xf97c2fc000000000, + 0x4475493200000000, 0x60ac4e5100000000, 0x0cc746f400000000, + 0x281e419700000000}, + {0x0000000000000000, 0x08e3603c00000000, 0x10c6c17800000000, + 0x1825a14400000000, 0x208c83f100000000, 0x286fe3cd00000000, + 0x304a428900000000, 0x38a922b500000000, 0x011e763800000000, + 0x09fd160400000000, 0x11d8b74000000000, 0x193bd77c00000000, + 0x2192f5c900000000, 0x297195f500000000, 0x315434b100000000, + 0x39b7548d00000000, 0x023cec7000000000, 0x0adf8c4c00000000, + 0x12fa2d0800000000, 0x1a194d3400000000, 0x22b06f8100000000, + 0x2a530fbd00000000, 0x3276aef900000000, 0x3a95cec500000000, + 0x03229a4800000000, 0x0bc1fa7400000000, 0x13e45b3000000000, + 0x1b073b0c00000000, 0x23ae19b900000000, 0x2b4d798500000000, + 0x3368d8c100000000, 0x3b8bb8fd00000000, 0x0478d8e100000000, + 0x0c9bb8dd00000000, 0x14be199900000000, 0x1c5d79a500000000, + 0x24f45b1000000000, 0x2c173b2c00000000, 0x34329a6800000000, + 0x3cd1fa5400000000, 0x0566aed900000000, 0x0d85cee500000000, + 0x15a06fa100000000, 0x1d430f9d00000000, 0x25ea2d2800000000, + 0x2d094d1400000000, 0x352cec5000000000, 0x3dcf8c6c00000000, + 0x0644349100000000, 0x0ea754ad00000000, 0x1682f5e900000000, + 0x1e6195d500000000, 0x26c8b76000000000, 0x2e2bd75c00000000, + 0x360e761800000000, 0x3eed162400000000, 0x075a42a900000000, + 0x0fb9229500000000, 0x179c83d100000000, 0x1f7fe3ed00000000, + 0x27d6c15800000000, 0x2f35a16400000000, 0x3710002000000000, + 0x3ff3601c00000000, 0x49f6c11800000000, 0x4115a12400000000, + 0x5930006000000000, 0x51d3605c00000000, 0x697a42e900000000, + 0x619922d500000000, 0x79bc839100000000, 0x715fe3ad00000000, + 0x48e8b72000000000, 0x400bd71c00000000, 0x582e765800000000, + 0x50cd166400000000, 0x686434d100000000, 0x608754ed00000000, + 0x78a2f5a900000000, 0x7041959500000000, 0x4bca2d6800000000, + 0x43294d5400000000, 0x5b0cec1000000000, 0x53ef8c2c00000000, + 0x6b46ae9900000000, 0x63a5cea500000000, 0x7b806fe100000000, + 0x73630fdd00000000, 0x4ad45b5000000000, 0x42373b6c00000000, + 0x5a129a2800000000, 0x52f1fa1400000000, 0x6a58d8a100000000, + 0x62bbb89d00000000, 0x7a9e19d900000000, 0x727d79e500000000, + 0x4d8e19f900000000, 0x456d79c500000000, 0x5d48d88100000000, + 0x55abb8bd00000000, 0x6d029a0800000000, 0x65e1fa3400000000, + 0x7dc45b7000000000, 0x75273b4c00000000, 0x4c906fc100000000, + 0x44730ffd00000000, 0x5c56aeb900000000, 0x54b5ce8500000000, + 0x6c1cec3000000000, 0x64ff8c0c00000000, 0x7cda2d4800000000, + 0x74394d7400000000, 0x4fb2f58900000000, 0x475195b500000000, + 0x5f7434f100000000, 0x579754cd00000000, 0x6f3e767800000000, + 0x67dd164400000000, 0x7ff8b70000000000, 0x771bd73c00000000, + 0x4eac83b100000000, 0x464fe38d00000000, 0x5e6a42c900000000, + 0x568922f500000000, 0x6e20004000000000, 0x66c3607c00000000, + 0x7ee6c13800000000, 0x7605a10400000000, 0x92ec833100000000, + 0x9a0fe30d00000000, 0x822a424900000000, 0x8ac9227500000000, + 0xb26000c000000000, 0xba8360fc00000000, 0xa2a6c1b800000000, + 0xaa45a18400000000, 0x93f2f50900000000, 0x9b11953500000000, + 0x8334347100000000, 0x8bd7544d00000000, 0xb37e76f800000000, + 0xbb9d16c400000000, 0xa3b8b78000000000, 0xab5bd7bc00000000, + 0x90d06f4100000000, 0x98330f7d00000000, 0x8016ae3900000000, + 0x88f5ce0500000000, 0xb05cecb000000000, 0xb8bf8c8c00000000, + 0xa09a2dc800000000, 0xa8794df400000000, 0x91ce197900000000, + 0x992d794500000000, 0x8108d80100000000, 0x89ebb83d00000000, + 0xb1429a8800000000, 0xb9a1fab400000000, 0xa1845bf000000000, + 0xa9673bcc00000000, 0x96945bd000000000, 0x9e773bec00000000, + 0x86529aa800000000, 0x8eb1fa9400000000, 0xb618d82100000000, + 0xbefbb81d00000000, 0xa6de195900000000, 0xae3d796500000000, + 0x978a2de800000000, 0x9f694dd400000000, 0x874cec9000000000, + 0x8faf8cac00000000, 0xb706ae1900000000, 0xbfe5ce2500000000, + 0xa7c06f6100000000, 0xaf230f5d00000000, 0x94a8b7a000000000, + 0x9c4bd79c00000000, 0x846e76d800000000, 0x8c8d16e400000000, + 0xb424345100000000, 0xbcc7546d00000000, 0xa4e2f52900000000, + 0xac01951500000000, 0x95b6c19800000000, 0x9d55a1a400000000, + 0x857000e000000000, 0x8d9360dc00000000, 0xb53a426900000000, + 0xbdd9225500000000, 0xa5fc831100000000, 0xad1fe32d00000000, + 0xdb1a422900000000, 0xd3f9221500000000, 0xcbdc835100000000, + 0xc33fe36d00000000, 0xfb96c1d800000000, 0xf375a1e400000000, + 0xeb5000a000000000, 0xe3b3609c00000000, 0xda04341100000000, + 0xd2e7542d00000000, 0xcac2f56900000000, 0xc221955500000000, + 0xfa88b7e000000000, 0xf26bd7dc00000000, 0xea4e769800000000, + 0xe2ad16a400000000, 0xd926ae5900000000, 0xd1c5ce6500000000, + 0xc9e06f2100000000, 0xc1030f1d00000000, 0xf9aa2da800000000, + 0xf1494d9400000000, 0xe96cecd000000000, 0xe18f8cec00000000, + 0xd838d86100000000, 0xd0dbb85d00000000, 0xc8fe191900000000, + 0xc01d792500000000, 0xf8b45b9000000000, 0xf0573bac00000000, + 0xe8729ae800000000, 0xe091fad400000000, 0xdf629ac800000000, + 0xd781faf400000000, 0xcfa45bb000000000, 0xc7473b8c00000000, + 0xffee193900000000, 0xf70d790500000000, 0xef28d84100000000, + 0xe7cbb87d00000000, 0xde7cecf000000000, 0xd69f8ccc00000000, + 0xceba2d8800000000, 0xc6594db400000000, 0xfef06f0100000000, + 0xf6130f3d00000000, 0xee36ae7900000000, 0xe6d5ce4500000000, + 0xdd5e76b800000000, 0xd5bd168400000000, 0xcd98b7c000000000, + 0xc57bd7fc00000000, 0xfdd2f54900000000, 0xf531957500000000, + 0xed14343100000000, 0xe5f7540d00000000, 0xdc40008000000000, + 0xd4a360bc00000000, 0xcc86c1f800000000, 0xc465a1c400000000, + 0xfccc837100000000, 0xf42fe34d00000000, 0xec0a420900000000, + 0xe4e9223500000000}, + {0x0000000000000000, 0xd1e8e70e00000000, 0xa2d1cf1d00000000, + 0x7339281300000000, 0x44a39f3b00000000, 0x954b783500000000, + 0xe672502600000000, 0x379ab72800000000, 0x88463f7700000000, + 0x59aed87900000000, 0x2a97f06a00000000, 0xfb7f176400000000, + 0xcce5a04c00000000, 0x1d0d474200000000, 0x6e346f5100000000, + 0xbfdc885f00000000, 0x108d7eee00000000, 0xc16599e000000000, + 0xb25cb1f300000000, 0x63b456fd00000000, 0x542ee1d500000000, + 0x85c606db00000000, 0xf6ff2ec800000000, 0x2717c9c600000000, + 0x98cb419900000000, 0x4923a69700000000, 0x3a1a8e8400000000, + 0xebf2698a00000000, 0xdc68dea200000000, 0x0d8039ac00000000, + 0x7eb911bf00000000, 0xaf51f6b100000000, 0x611c8c0700000000, + 0xb0f46b0900000000, 0xc3cd431a00000000, 0x1225a41400000000, + 0x25bf133c00000000, 0xf457f43200000000, 0x876edc2100000000, + 0x56863b2f00000000, 0xe95ab37000000000, 0x38b2547e00000000, + 0x4b8b7c6d00000000, 0x9a639b6300000000, 0xadf92c4b00000000, + 0x7c11cb4500000000, 0x0f28e35600000000, 0xdec0045800000000, + 0x7191f2e900000000, 0xa07915e700000000, 0xd3403df400000000, + 0x02a8dafa00000000, 0x35326dd200000000, 0xe4da8adc00000000, + 0x97e3a2cf00000000, 0x460b45c100000000, 0xf9d7cd9e00000000, + 0x283f2a9000000000, 0x5b06028300000000, 0x8aeee58d00000000, + 0xbd7452a500000000, 0x6c9cb5ab00000000, 0x1fa59db800000000, + 0xce4d7ab600000000, 0xc238180f00000000, 0x13d0ff0100000000, + 0x60e9d71200000000, 0xb101301c00000000, 0x869b873400000000, + 0x5773603a00000000, 0x244a482900000000, 0xf5a2af2700000000, + 0x4a7e277800000000, 0x9b96c07600000000, 0xe8afe86500000000, + 0x39470f6b00000000, 0x0eddb84300000000, 0xdf355f4d00000000, + 0xac0c775e00000000, 0x7de4905000000000, 0xd2b566e100000000, + 0x035d81ef00000000, 0x7064a9fc00000000, 0xa18c4ef200000000, + 0x9616f9da00000000, 0x47fe1ed400000000, 0x34c736c700000000, + 0xe52fd1c900000000, 0x5af3599600000000, 0x8b1bbe9800000000, + 0xf822968b00000000, 0x29ca718500000000, 0x1e50c6ad00000000, + 0xcfb821a300000000, 0xbc8109b000000000, 0x6d69eebe00000000, + 0xa324940800000000, 0x72cc730600000000, 0x01f55b1500000000, + 0xd01dbc1b00000000, 0xe7870b3300000000, 0x366fec3d00000000, + 0x4556c42e00000000, 0x94be232000000000, 0x2b62ab7f00000000, + 0xfa8a4c7100000000, 0x89b3646200000000, 0x585b836c00000000, + 0x6fc1344400000000, 0xbe29d34a00000000, 0xcd10fb5900000000, + 0x1cf81c5700000000, 0xb3a9eae600000000, 0x62410de800000000, + 0x117825fb00000000, 0xc090c2f500000000, 0xf70a75dd00000000, + 0x26e292d300000000, 0x55dbbac000000000, 0x84335dce00000000, + 0x3befd59100000000, 0xea07329f00000000, 0x993e1a8c00000000, + 0x48d6fd8200000000, 0x7f4c4aaa00000000, 0xaea4ada400000000, + 0xdd9d85b700000000, 0x0c7562b900000000, 0x8471301e00000000, + 0x5599d71000000000, 0x26a0ff0300000000, 0xf748180d00000000, + 0xc0d2af2500000000, 0x113a482b00000000, 0x6203603800000000, + 0xb3eb873600000000, 0x0c370f6900000000, 0xdddfe86700000000, + 0xaee6c07400000000, 0x7f0e277a00000000, 0x4894905200000000, + 0x997c775c00000000, 0xea455f4f00000000, 0x3badb84100000000, + 0x94fc4ef000000000, 0x4514a9fe00000000, 0x362d81ed00000000, + 0xe7c566e300000000, 0xd05fd1cb00000000, 0x01b736c500000000, + 0x728e1ed600000000, 0xa366f9d800000000, 0x1cba718700000000, + 0xcd52968900000000, 0xbe6bbe9a00000000, 0x6f83599400000000, + 0x5819eebc00000000, 0x89f109b200000000, 0xfac821a100000000, + 0x2b20c6af00000000, 0xe56dbc1900000000, 0x34855b1700000000, + 0x47bc730400000000, 0x9654940a00000000, 0xa1ce232200000000, + 0x7026c42c00000000, 0x031fec3f00000000, 0xd2f70b3100000000, + 0x6d2b836e00000000, 0xbcc3646000000000, 0xcffa4c7300000000, + 0x1e12ab7d00000000, 0x29881c5500000000, 0xf860fb5b00000000, + 0x8b59d34800000000, 0x5ab1344600000000, 0xf5e0c2f700000000, + 0x240825f900000000, 0x57310dea00000000, 0x86d9eae400000000, + 0xb1435dcc00000000, 0x60abbac200000000, 0x139292d100000000, + 0xc27a75df00000000, 0x7da6fd8000000000, 0xac4e1a8e00000000, + 0xdf77329d00000000, 0x0e9fd59300000000, 0x390562bb00000000, + 0xe8ed85b500000000, 0x9bd4ada600000000, 0x4a3c4aa800000000, + 0x4649281100000000, 0x97a1cf1f00000000, 0xe498e70c00000000, + 0x3570000200000000, 0x02eab72a00000000, 0xd302502400000000, + 0xa03b783700000000, 0x71d39f3900000000, 0xce0f176600000000, + 0x1fe7f06800000000, 0x6cded87b00000000, 0xbd363f7500000000, + 0x8aac885d00000000, 0x5b446f5300000000, 0x287d474000000000, + 0xf995a04e00000000, 0x56c456ff00000000, 0x872cb1f100000000, + 0xf41599e200000000, 0x25fd7eec00000000, 0x1267c9c400000000, + 0xc38f2eca00000000, 0xb0b606d900000000, 0x615ee1d700000000, + 0xde82698800000000, 0x0f6a8e8600000000, 0x7c53a69500000000, + 0xadbb419b00000000, 0x9a21f6b300000000, 0x4bc911bd00000000, + 0x38f039ae00000000, 0xe918dea000000000, 0x2755a41600000000, + 0xf6bd431800000000, 0x85846b0b00000000, 0x546c8c0500000000, + 0x63f63b2d00000000, 0xb21edc2300000000, 0xc127f43000000000, + 0x10cf133e00000000, 0xaf139b6100000000, 0x7efb7c6f00000000, + 0x0dc2547c00000000, 0xdc2ab37200000000, 0xebb0045a00000000, + 0x3a58e35400000000, 0x4961cb4700000000, 0x98892c4900000000, + 0x37d8daf800000000, 0xe6303df600000000, 0x950915e500000000, + 0x44e1f2eb00000000, 0x737b45c300000000, 0xa293a2cd00000000, + 0xd1aa8ade00000000, 0x00426dd000000000, 0xbf9ee58f00000000, + 0x6e76028100000000, 0x1d4f2a9200000000, 0xcca7cd9c00000000, + 0xfb3d7ab400000000, 0x2ad59dba00000000, 0x59ecb5a900000000, + 0x880452a700000000}, + {0x0000000000000000, 0xaa05daf100000000, 0x150dc53800000000, + 0xbf081fc900000000, 0x2a1a8a7100000000, 0x801f508000000000, + 0x3f174f4900000000, 0x951295b800000000, 0x543414e300000000, + 0xfe31ce1200000000, 0x4139d1db00000000, 0xeb3c0b2a00000000, + 0x7e2e9e9200000000, 0xd42b446300000000, 0x6b235baa00000000, + 0xc126815b00000000, 0xe96e591d00000000, 0x436b83ec00000000, + 0xfc639c2500000000, 0x566646d400000000, 0xc374d36c00000000, + 0x6971099d00000000, 0xd679165400000000, 0x7c7ccca500000000, + 0xbd5a4dfe00000000, 0x175f970f00000000, 0xa85788c600000000, + 0x0252523700000000, 0x9740c78f00000000, 0x3d451d7e00000000, + 0x824d02b700000000, 0x2848d84600000000, 0xd2ddb23a00000000, + 0x78d868cb00000000, 0xc7d0770200000000, 0x6dd5adf300000000, + 0xf8c7384b00000000, 0x52c2e2ba00000000, 0xedcafd7300000000, + 0x47cf278200000000, 0x86e9a6d900000000, 0x2cec7c2800000000, + 0x93e463e100000000, 0x39e1b91000000000, 0xacf32ca800000000, + 0x06f6f65900000000, 0xb9fee99000000000, 0x13fb336100000000, + 0x3bb3eb2700000000, 0x91b631d600000000, 0x2ebe2e1f00000000, + 0x84bbf4ee00000000, 0x11a9615600000000, 0xbbacbba700000000, + 0x04a4a46e00000000, 0xaea17e9f00000000, 0x6f87ffc400000000, + 0xc582253500000000, 0x7a8a3afc00000000, 0xd08fe00d00000000, + 0x459d75b500000000, 0xef98af4400000000, 0x5090b08d00000000, + 0xfa956a7c00000000, 0xa4bb657500000000, 0x0ebebf8400000000, + 0xb1b6a04d00000000, 0x1bb37abc00000000, 0x8ea1ef0400000000, + 0x24a435f500000000, 0x9bac2a3c00000000, 0x31a9f0cd00000000, + 0xf08f719600000000, 0x5a8aab6700000000, 0xe582b4ae00000000, + 0x4f876e5f00000000, 0xda95fbe700000000, 0x7090211600000000, + 0xcf983edf00000000, 0x659de42e00000000, 0x4dd53c6800000000, + 0xe7d0e69900000000, 0x58d8f95000000000, 0xf2dd23a100000000, + 0x67cfb61900000000, 0xcdca6ce800000000, 0x72c2732100000000, + 0xd8c7a9d000000000, 0x19e1288b00000000, 0xb3e4f27a00000000, + 0x0cecedb300000000, 0xa6e9374200000000, 0x33fba2fa00000000, + 0x99fe780b00000000, 0x26f667c200000000, 0x8cf3bd3300000000, + 0x7666d74f00000000, 0xdc630dbe00000000, 0x636b127700000000, + 0xc96ec88600000000, 0x5c7c5d3e00000000, 0xf67987cf00000000, + 0x4971980600000000, 0xe37442f700000000, 0x2252c3ac00000000, + 0x8857195d00000000, 0x375f069400000000, 0x9d5adc6500000000, + 0x084849dd00000000, 0xa24d932c00000000, 0x1d458ce500000000, + 0xb740561400000000, 0x9f088e5200000000, 0x350d54a300000000, + 0x8a054b6a00000000, 0x2000919b00000000, 0xb512042300000000, + 0x1f17ded200000000, 0xa01fc11b00000000, 0x0a1a1bea00000000, + 0xcb3c9ab100000000, 0x6139404000000000, 0xde315f8900000000, + 0x7434857800000000, 0xe12610c000000000, 0x4b23ca3100000000, + 0xf42bd5f800000000, 0x5e2e0f0900000000, 0x4877cbea00000000, + 0xe272111b00000000, 0x5d7a0ed200000000, 0xf77fd42300000000, + 0x626d419b00000000, 0xc8689b6a00000000, 0x776084a300000000, + 0xdd655e5200000000, 0x1c43df0900000000, 0xb64605f800000000, + 0x094e1a3100000000, 0xa34bc0c000000000, 0x3659557800000000, + 0x9c5c8f8900000000, 0x2354904000000000, 0x89514ab100000000, + 0xa11992f700000000, 0x0b1c480600000000, 0xb41457cf00000000, + 0x1e118d3e00000000, 0x8b03188600000000, 0x2106c27700000000, + 0x9e0eddbe00000000, 0x340b074f00000000, 0xf52d861400000000, + 0x5f285ce500000000, 0xe020432c00000000, 0x4a2599dd00000000, + 0xdf370c6500000000, 0x7532d69400000000, 0xca3ac95d00000000, + 0x603f13ac00000000, 0x9aaa79d000000000, 0x30afa32100000000, + 0x8fa7bce800000000, 0x25a2661900000000, 0xb0b0f3a100000000, + 0x1ab5295000000000, 0xa5bd369900000000, 0x0fb8ec6800000000, + 0xce9e6d3300000000, 0x649bb7c200000000, 0xdb93a80b00000000, + 0x719672fa00000000, 0xe484e74200000000, 0x4e813db300000000, + 0xf189227a00000000, 0x5b8cf88b00000000, 0x73c420cd00000000, + 0xd9c1fa3c00000000, 0x66c9e5f500000000, 0xcccc3f0400000000, + 0x59deaabc00000000, 0xf3db704d00000000, 0x4cd36f8400000000, + 0xe6d6b57500000000, 0x27f0342e00000000, 0x8df5eedf00000000, + 0x32fdf11600000000, 0x98f82be700000000, 0x0deabe5f00000000, + 0xa7ef64ae00000000, 0x18e77b6700000000, 0xb2e2a19600000000, + 0xecccae9f00000000, 0x46c9746e00000000, 0xf9c16ba700000000, + 0x53c4b15600000000, 0xc6d624ee00000000, 0x6cd3fe1f00000000, + 0xd3dbe1d600000000, 0x79de3b2700000000, 0xb8f8ba7c00000000, + 0x12fd608d00000000, 0xadf57f4400000000, 0x07f0a5b500000000, + 0x92e2300d00000000, 0x38e7eafc00000000, 0x87eff53500000000, + 0x2dea2fc400000000, 0x05a2f78200000000, 0xafa72d7300000000, + 0x10af32ba00000000, 0xbaaae84b00000000, 0x2fb87df300000000, + 0x85bda70200000000, 0x3ab5b8cb00000000, 0x90b0623a00000000, + 0x5196e36100000000, 0xfb93399000000000, 0x449b265900000000, + 0xee9efca800000000, 0x7b8c691000000000, 0xd189b3e100000000, + 0x6e81ac2800000000, 0xc48476d900000000, 0x3e111ca500000000, + 0x9414c65400000000, 0x2b1cd99d00000000, 0x8119036c00000000, + 0x140b96d400000000, 0xbe0e4c2500000000, 0x010653ec00000000, + 0xab03891d00000000, 0x6a25084600000000, 0xc020d2b700000000, + 0x7f28cd7e00000000, 0xd52d178f00000000, 0x403f823700000000, + 0xea3a58c600000000, 0x5532470f00000000, 0xff379dfe00000000, + 0xd77f45b800000000, 0x7d7a9f4900000000, 0xc272808000000000, + 0x68775a7100000000, 0xfd65cfc900000000, 0x5760153800000000, + 0xe8680af100000000, 0x426dd00000000000, 0x834b515b00000000, + 0x294e8baa00000000, 0x9646946300000000, 0x3c434e9200000000, + 0xa951db2a00000000, 0x035401db00000000, 0xbc5c1e1200000000, + 0x1659c4e300000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xae689191, 0x87a02563, 0x29c8b4f2, 0xd4314c87, + 0x7a59dd16, 0x539169e4, 0xfdf9f875, 0x73139f4f, 0xdd7b0ede, + 0xf4b3ba2c, 0x5adb2bbd, 0xa722d3c8, 0x094a4259, 0x2082f6ab, + 0x8eea673a, 0xe6273e9e, 0x484faf0f, 0x61871bfd, 0xcfef8a6c, + 0x32167219, 0x9c7ee388, 0xb5b6577a, 0x1bdec6eb, 0x9534a1d1, + 0x3b5c3040, 0x129484b2, 0xbcfc1523, 0x4105ed56, 0xef6d7cc7, + 0xc6a5c835, 0x68cd59a4, 0x173f7b7d, 0xb957eaec, 0x909f5e1e, + 0x3ef7cf8f, 0xc30e37fa, 0x6d66a66b, 0x44ae1299, 0xeac68308, + 0x642ce432, 0xca4475a3, 0xe38cc151, 0x4de450c0, 0xb01da8b5, + 0x1e753924, 0x37bd8dd6, 0x99d51c47, 0xf11845e3, 0x5f70d472, + 0x76b86080, 0xd8d0f111, 0x25290964, 0x8b4198f5, 0xa2892c07, + 0x0ce1bd96, 0x820bdaac, 0x2c634b3d, 0x05abffcf, 0xabc36e5e, + 0x563a962b, 0xf85207ba, 0xd19ab348, 0x7ff222d9, 0x2e7ef6fa, + 0x8016676b, 0xa9ded399, 0x07b64208, 0xfa4fba7d, 0x54272bec, + 0x7def9f1e, 0xd3870e8f, 0x5d6d69b5, 0xf305f824, 0xdacd4cd6, + 0x74a5dd47, 0x895c2532, 0x2734b4a3, 0x0efc0051, 0xa09491c0, + 0xc859c864, 0x663159f5, 0x4ff9ed07, 0xe1917c96, 0x1c6884e3, + 0xb2001572, 0x9bc8a180, 0x35a03011, 0xbb4a572b, 0x1522c6ba, + 0x3cea7248, 0x9282e3d9, 0x6f7b1bac, 0xc1138a3d, 0xe8db3ecf, + 0x46b3af5e, 0x39418d87, 0x97291c16, 0xbee1a8e4, 0x10893975, + 0xed70c100, 0x43185091, 0x6ad0e463, 0xc4b875f2, 0x4a5212c8, + 0xe43a8359, 0xcdf237ab, 0x639aa63a, 0x9e635e4f, 0x300bcfde, + 0x19c37b2c, 0xb7abeabd, 0xdf66b319, 0x710e2288, 0x58c6967a, + 0xf6ae07eb, 0x0b57ff9e, 0xa53f6e0f, 0x8cf7dafd, 0x229f4b6c, + 0xac752c56, 0x021dbdc7, 0x2bd50935, 0x85bd98a4, 0x784460d1, + 0xd62cf140, 0xffe445b2, 0x518cd423, 0x5cfdedf4, 0xf2957c65, + 0xdb5dc897, 0x75355906, 0x88cca173, 0x26a430e2, 0x0f6c8410, + 0xa1041581, 0x2fee72bb, 0x8186e32a, 0xa84e57d8, 0x0626c649, + 0xfbdf3e3c, 0x55b7afad, 0x7c7f1b5f, 0xd2178ace, 0xbadad36a, + 0x14b242fb, 0x3d7af609, 0x93126798, 0x6eeb9fed, 0xc0830e7c, + 0xe94bba8e, 0x47232b1f, 0xc9c94c25, 0x67a1ddb4, 0x4e696946, + 0xe001f8d7, 0x1df800a2, 0xb3909133, 0x9a5825c1, 0x3430b450, + 0x4bc29689, 0xe5aa0718, 0xcc62b3ea, 0x620a227b, 0x9ff3da0e, + 0x319b4b9f, 0x1853ff6d, 0xb63b6efc, 0x38d109c6, 0x96b99857, + 0xbf712ca5, 0x1119bd34, 0xece04541, 0x4288d4d0, 0x6b406022, + 0xc528f1b3, 0xade5a817, 0x038d3986, 0x2a458d74, 0x842d1ce5, + 0x79d4e490, 0xd7bc7501, 0xfe74c1f3, 0x501c5062, 0xdef63758, + 0x709ea6c9, 0x5956123b, 0xf73e83aa, 0x0ac77bdf, 0xa4afea4e, + 0x8d675ebc, 0x230fcf2d, 0x72831b0e, 0xdceb8a9f, 0xf5233e6d, + 0x5b4baffc, 0xa6b25789, 0x08dac618, 0x211272ea, 0x8f7ae37b, + 0x01908441, 0xaff815d0, 0x8630a122, 0x285830b3, 0xd5a1c8c6, + 0x7bc95957, 0x5201eda5, 0xfc697c34, 0x94a42590, 0x3accb401, + 0x130400f3, 0xbd6c9162, 0x40956917, 0xeefdf886, 0xc7354c74, + 0x695ddde5, 0xe7b7badf, 0x49df2b4e, 0x60179fbc, 0xce7f0e2d, + 0x3386f658, 0x9dee67c9, 0xb426d33b, 0x1a4e42aa, 0x65bc6073, + 0xcbd4f1e2, 0xe21c4510, 0x4c74d481, 0xb18d2cf4, 0x1fe5bd65, + 0x362d0997, 0x98459806, 0x16afff3c, 0xb8c76ead, 0x910fda5f, + 0x3f674bce, 0xc29eb3bb, 0x6cf6222a, 0x453e96d8, 0xeb560749, + 0x839b5eed, 0x2df3cf7c, 0x043b7b8e, 0xaa53ea1f, 0x57aa126a, + 0xf9c283fb, 0xd00a3709, 0x7e62a698, 0xf088c1a2, 0x5ee05033, + 0x7728e4c1, 0xd9407550, 0x24b98d25, 0x8ad11cb4, 0xa319a846, + 0x0d7139d7}, + {0x00000000, 0xb9fbdbe8, 0xa886b191, 0x117d6a79, 0x8a7c6563, + 0x3387be8b, 0x22fad4f2, 0x9b010f1a, 0xcf89cc87, 0x7672176f, + 0x670f7d16, 0xdef4a6fe, 0x45f5a9e4, 0xfc0e720c, 0xed731875, + 0x5488c39d, 0x44629f4f, 0xfd9944a7, 0xece42ede, 0x551ff536, + 0xce1efa2c, 0x77e521c4, 0x66984bbd, 0xdf639055, 0x8beb53c8, + 0x32108820, 0x236de259, 0x9a9639b1, 0x019736ab, 0xb86ced43, + 0xa911873a, 0x10ea5cd2, 0x88c53e9e, 0x313ee576, 0x20438f0f, + 0x99b854e7, 0x02b95bfd, 0xbb428015, 0xaa3fea6c, 0x13c43184, + 0x474cf219, 0xfeb729f1, 0xefca4388, 0x56319860, 0xcd30977a, + 0x74cb4c92, 0x65b626eb, 0xdc4dfd03, 0xcca7a1d1, 0x755c7a39, + 0x64211040, 0xdddacba8, 0x46dbc4b2, 0xff201f5a, 0xee5d7523, + 0x57a6aecb, 0x032e6d56, 0xbad5b6be, 0xaba8dcc7, 0x1253072f, + 0x89520835, 0x30a9d3dd, 0x21d4b9a4, 0x982f624c, 0xcafb7b7d, + 0x7300a095, 0x627dcaec, 0xdb861104, 0x40871e1e, 0xf97cc5f6, + 0xe801af8f, 0x51fa7467, 0x0572b7fa, 0xbc896c12, 0xadf4066b, + 0x140fdd83, 0x8f0ed299, 0x36f50971, 0x27886308, 0x9e73b8e0, + 0x8e99e432, 0x37623fda, 0x261f55a3, 0x9fe48e4b, 0x04e58151, + 0xbd1e5ab9, 0xac6330c0, 0x1598eb28, 0x411028b5, 0xf8ebf35d, + 0xe9969924, 0x506d42cc, 0xcb6c4dd6, 0x7297963e, 0x63eafc47, + 0xda1127af, 0x423e45e3, 0xfbc59e0b, 0xeab8f472, 0x53432f9a, + 0xc8422080, 0x71b9fb68, 0x60c49111, 0xd93f4af9, 0x8db78964, + 0x344c528c, 0x253138f5, 0x9ccae31d, 0x07cbec07, 0xbe3037ef, + 0xaf4d5d96, 0x16b6867e, 0x065cdaac, 0xbfa70144, 0xaeda6b3d, + 0x1721b0d5, 0x8c20bfcf, 0x35db6427, 0x24a60e5e, 0x9d5dd5b6, + 0xc9d5162b, 0x702ecdc3, 0x6153a7ba, 0xd8a87c52, 0x43a97348, + 0xfa52a8a0, 0xeb2fc2d9, 0x52d41931, 0x4e87f0bb, 0xf77c2b53, + 0xe601412a, 0x5ffa9ac2, 0xc4fb95d8, 0x7d004e30, 0x6c7d2449, + 0xd586ffa1, 0x810e3c3c, 0x38f5e7d4, 0x29888dad, 0x90735645, + 0x0b72595f, 0xb28982b7, 0xa3f4e8ce, 0x1a0f3326, 0x0ae56ff4, + 0xb31eb41c, 0xa263de65, 0x1b98058d, 0x80990a97, 0x3962d17f, + 0x281fbb06, 0x91e460ee, 0xc56ca373, 0x7c97789b, 0x6dea12e2, + 0xd411c90a, 0x4f10c610, 0xf6eb1df8, 0xe7967781, 0x5e6dac69, + 0xc642ce25, 0x7fb915cd, 0x6ec47fb4, 0xd73fa45c, 0x4c3eab46, + 0xf5c570ae, 0xe4b81ad7, 0x5d43c13f, 0x09cb02a2, 0xb030d94a, + 0xa14db333, 0x18b668db, 0x83b767c1, 0x3a4cbc29, 0x2b31d650, + 0x92ca0db8, 0x8220516a, 0x3bdb8a82, 0x2aa6e0fb, 0x935d3b13, + 0x085c3409, 0xb1a7efe1, 0xa0da8598, 0x19215e70, 0x4da99ded, + 0xf4524605, 0xe52f2c7c, 0x5cd4f794, 0xc7d5f88e, 0x7e2e2366, + 0x6f53491f, 0xd6a892f7, 0x847c8bc6, 0x3d87502e, 0x2cfa3a57, + 0x9501e1bf, 0x0e00eea5, 0xb7fb354d, 0xa6865f34, 0x1f7d84dc, + 0x4bf54741, 0xf20e9ca9, 0xe373f6d0, 0x5a882d38, 0xc1892222, + 0x7872f9ca, 0x690f93b3, 0xd0f4485b, 0xc01e1489, 0x79e5cf61, + 0x6898a518, 0xd1637ef0, 0x4a6271ea, 0xf399aa02, 0xe2e4c07b, + 0x5b1f1b93, 0x0f97d80e, 0xb66c03e6, 0xa711699f, 0x1eeab277, + 0x85ebbd6d, 0x3c106685, 0x2d6d0cfc, 0x9496d714, 0x0cb9b558, + 0xb5426eb0, 0xa43f04c9, 0x1dc4df21, 0x86c5d03b, 0x3f3e0bd3, + 0x2e4361aa, 0x97b8ba42, 0xc33079df, 0x7acba237, 0x6bb6c84e, + 0xd24d13a6, 0x494c1cbc, 0xf0b7c754, 0xe1caad2d, 0x583176c5, + 0x48db2a17, 0xf120f1ff, 0xe05d9b86, 0x59a6406e, 0xc2a74f74, + 0x7b5c949c, 0x6a21fee5, 0xd3da250d, 0x8752e690, 0x3ea93d78, + 0x2fd45701, 0x962f8ce9, 0x0d2e83f3, 0xb4d5581b, 0xa5a83262, + 0x1c53e98a}, + {0x00000000, 0x9d0fe176, 0xe16ec4ad, 0x7c6125db, 0x19ac8f1b, + 0x84a36e6d, 0xf8c24bb6, 0x65cdaac0, 0x33591e36, 0xae56ff40, + 0xd237da9b, 0x4f383bed, 0x2af5912d, 0xb7fa705b, 0xcb9b5580, + 0x5694b4f6, 0x66b23c6c, 0xfbbddd1a, 0x87dcf8c1, 0x1ad319b7, + 0x7f1eb377, 0xe2115201, 0x9e7077da, 0x037f96ac, 0x55eb225a, + 0xc8e4c32c, 0xb485e6f7, 0x298a0781, 0x4c47ad41, 0xd1484c37, + 0xad2969ec, 0x3026889a, 0xcd6478d8, 0x506b99ae, 0x2c0abc75, + 0xb1055d03, 0xd4c8f7c3, 0x49c716b5, 0x35a6336e, 0xa8a9d218, + 0xfe3d66ee, 0x63328798, 0x1f53a243, 0x825c4335, 0xe791e9f5, + 0x7a9e0883, 0x06ff2d58, 0x9bf0cc2e, 0xabd644b4, 0x36d9a5c2, + 0x4ab88019, 0xd7b7616f, 0xb27acbaf, 0x2f752ad9, 0x53140f02, + 0xce1bee74, 0x988f5a82, 0x0580bbf4, 0x79e19e2f, 0xe4ee7f59, + 0x8123d599, 0x1c2c34ef, 0x604d1134, 0xfd42f042, 0x41b9f7f1, + 0xdcb61687, 0xa0d7335c, 0x3dd8d22a, 0x581578ea, 0xc51a999c, + 0xb97bbc47, 0x24745d31, 0x72e0e9c7, 0xefef08b1, 0x938e2d6a, + 0x0e81cc1c, 0x6b4c66dc, 0xf64387aa, 0x8a22a271, 0x172d4307, + 0x270bcb9d, 0xba042aeb, 0xc6650f30, 0x5b6aee46, 0x3ea74486, + 0xa3a8a5f0, 0xdfc9802b, 0x42c6615d, 0x1452d5ab, 0x895d34dd, + 0xf53c1106, 0x6833f070, 0x0dfe5ab0, 0x90f1bbc6, 0xec909e1d, + 0x719f7f6b, 0x8cdd8f29, 0x11d26e5f, 0x6db34b84, 0xf0bcaaf2, + 0x95710032, 0x087ee144, 0x741fc49f, 0xe91025e9, 0xbf84911f, + 0x228b7069, 0x5eea55b2, 0xc3e5b4c4, 0xa6281e04, 0x3b27ff72, + 0x4746daa9, 0xda493bdf, 0xea6fb345, 0x77605233, 0x0b0177e8, + 0x960e969e, 0xf3c33c5e, 0x6eccdd28, 0x12adf8f3, 0x8fa21985, + 0xd936ad73, 0x44394c05, 0x385869de, 0xa55788a8, 0xc09a2268, + 0x5d95c31e, 0x21f4e6c5, 0xbcfb07b3, 0x8373efe2, 0x1e7c0e94, + 0x621d2b4f, 0xff12ca39, 0x9adf60f9, 0x07d0818f, 0x7bb1a454, + 0xe6be4522, 0xb02af1d4, 0x2d2510a2, 0x51443579, 0xcc4bd40f, + 0xa9867ecf, 0x34899fb9, 0x48e8ba62, 0xd5e75b14, 0xe5c1d38e, + 0x78ce32f8, 0x04af1723, 0x99a0f655, 0xfc6d5c95, 0x6162bde3, + 0x1d039838, 0x800c794e, 0xd698cdb8, 0x4b972cce, 0x37f60915, + 0xaaf9e863, 0xcf3442a3, 0x523ba3d5, 0x2e5a860e, 0xb3556778, + 0x4e17973a, 0xd318764c, 0xaf795397, 0x3276b2e1, 0x57bb1821, + 0xcab4f957, 0xb6d5dc8c, 0x2bda3dfa, 0x7d4e890c, 0xe041687a, + 0x9c204da1, 0x012facd7, 0x64e20617, 0xf9ede761, 0x858cc2ba, + 0x188323cc, 0x28a5ab56, 0xb5aa4a20, 0xc9cb6ffb, 0x54c48e8d, + 0x3109244d, 0xac06c53b, 0xd067e0e0, 0x4d680196, 0x1bfcb560, + 0x86f35416, 0xfa9271cd, 0x679d90bb, 0x02503a7b, 0x9f5fdb0d, + 0xe33efed6, 0x7e311fa0, 0xc2ca1813, 0x5fc5f965, 0x23a4dcbe, + 0xbeab3dc8, 0xdb669708, 0x4669767e, 0x3a0853a5, 0xa707b2d3, + 0xf1930625, 0x6c9ce753, 0x10fdc288, 0x8df223fe, 0xe83f893e, + 0x75306848, 0x09514d93, 0x945eace5, 0xa478247f, 0x3977c509, + 0x4516e0d2, 0xd81901a4, 0xbdd4ab64, 0x20db4a12, 0x5cba6fc9, + 0xc1b58ebf, 0x97213a49, 0x0a2edb3f, 0x764ffee4, 0xeb401f92, + 0x8e8db552, 0x13825424, 0x6fe371ff, 0xf2ec9089, 0x0fae60cb, + 0x92a181bd, 0xeec0a466, 0x73cf4510, 0x1602efd0, 0x8b0d0ea6, + 0xf76c2b7d, 0x6a63ca0b, 0x3cf77efd, 0xa1f89f8b, 0xdd99ba50, + 0x40965b26, 0x255bf1e6, 0xb8541090, 0xc435354b, 0x593ad43d, + 0x691c5ca7, 0xf413bdd1, 0x8872980a, 0x157d797c, 0x70b0d3bc, + 0xedbf32ca, 0x91de1711, 0x0cd1f667, 0x5a454291, 0xc74aa3e7, + 0xbb2b863c, 0x2624674a, 0x43e9cd8a, 0xdee62cfc, 0xa2870927, + 0x3f88e851}, + {0x00000000, 0xdd96d985, 0x605cb54b, 0xbdca6cce, 0xc0b96a96, + 0x1d2fb313, 0xa0e5dfdd, 0x7d730658, 0x5a03d36d, 0x87950ae8, + 0x3a5f6626, 0xe7c9bfa3, 0x9abab9fb, 0x472c607e, 0xfae60cb0, + 0x2770d535, 0xb407a6da, 0x69917f5f, 0xd45b1391, 0x09cdca14, + 0x74becc4c, 0xa92815c9, 0x14e27907, 0xc974a082, 0xee0475b7, + 0x3392ac32, 0x8e58c0fc, 0x53ce1979, 0x2ebd1f21, 0xf32bc6a4, + 0x4ee1aa6a, 0x937773ef, 0xb37e4bf5, 0x6ee89270, 0xd322febe, + 0x0eb4273b, 0x73c72163, 0xae51f8e6, 0x139b9428, 0xce0d4dad, + 0xe97d9898, 0x34eb411d, 0x89212dd3, 0x54b7f456, 0x29c4f20e, + 0xf4522b8b, 0x49984745, 0x940e9ec0, 0x0779ed2f, 0xdaef34aa, + 0x67255864, 0xbab381e1, 0xc7c087b9, 0x1a565e3c, 0xa79c32f2, + 0x7a0aeb77, 0x5d7a3e42, 0x80ece7c7, 0x3d268b09, 0xe0b0528c, + 0x9dc354d4, 0x40558d51, 0xfd9fe19f, 0x2009381a, 0xbd8d91ab, + 0x601b482e, 0xddd124e0, 0x0047fd65, 0x7d34fb3d, 0xa0a222b8, + 0x1d684e76, 0xc0fe97f3, 0xe78e42c6, 0x3a189b43, 0x87d2f78d, + 0x5a442e08, 0x27372850, 0xfaa1f1d5, 0x476b9d1b, 0x9afd449e, + 0x098a3771, 0xd41ceef4, 0x69d6823a, 0xb4405bbf, 0xc9335de7, + 0x14a58462, 0xa96fe8ac, 0x74f93129, 0x5389e41c, 0x8e1f3d99, + 0x33d55157, 0xee4388d2, 0x93308e8a, 0x4ea6570f, 0xf36c3bc1, + 0x2efae244, 0x0ef3da5e, 0xd36503db, 0x6eaf6f15, 0xb339b690, + 0xce4ab0c8, 0x13dc694d, 0xae160583, 0x7380dc06, 0x54f00933, + 0x8966d0b6, 0x34acbc78, 0xe93a65fd, 0x944963a5, 0x49dfba20, + 0xf415d6ee, 0x29830f6b, 0xbaf47c84, 0x6762a501, 0xdaa8c9cf, + 0x073e104a, 0x7a4d1612, 0xa7dbcf97, 0x1a11a359, 0xc7877adc, + 0xe0f7afe9, 0x3d61766c, 0x80ab1aa2, 0x5d3dc327, 0x204ec57f, + 0xfdd81cfa, 0x40127034, 0x9d84a9b1, 0xa06a2517, 0x7dfcfc92, + 0xc036905c, 0x1da049d9, 0x60d34f81, 0xbd459604, 0x008ffaca, + 0xdd19234f, 0xfa69f67a, 0x27ff2fff, 0x9a354331, 0x47a39ab4, + 0x3ad09cec, 0xe7464569, 0x5a8c29a7, 0x871af022, 0x146d83cd, + 0xc9fb5a48, 0x74313686, 0xa9a7ef03, 0xd4d4e95b, 0x094230de, + 0xb4885c10, 0x691e8595, 0x4e6e50a0, 0x93f88925, 0x2e32e5eb, + 0xf3a43c6e, 0x8ed73a36, 0x5341e3b3, 0xee8b8f7d, 0x331d56f8, + 0x13146ee2, 0xce82b767, 0x7348dba9, 0xaede022c, 0xd3ad0474, + 0x0e3bddf1, 0xb3f1b13f, 0x6e6768ba, 0x4917bd8f, 0x9481640a, + 0x294b08c4, 0xf4ddd141, 0x89aed719, 0x54380e9c, 0xe9f26252, + 0x3464bbd7, 0xa713c838, 0x7a8511bd, 0xc74f7d73, 0x1ad9a4f6, + 0x67aaa2ae, 0xba3c7b2b, 0x07f617e5, 0xda60ce60, 0xfd101b55, + 0x2086c2d0, 0x9d4cae1e, 0x40da779b, 0x3da971c3, 0xe03fa846, + 0x5df5c488, 0x80631d0d, 0x1de7b4bc, 0xc0716d39, 0x7dbb01f7, + 0xa02dd872, 0xdd5ede2a, 0x00c807af, 0xbd026b61, 0x6094b2e4, + 0x47e467d1, 0x9a72be54, 0x27b8d29a, 0xfa2e0b1f, 0x875d0d47, + 0x5acbd4c2, 0xe701b80c, 0x3a976189, 0xa9e01266, 0x7476cbe3, + 0xc9bca72d, 0x142a7ea8, 0x695978f0, 0xb4cfa175, 0x0905cdbb, + 0xd493143e, 0xf3e3c10b, 0x2e75188e, 0x93bf7440, 0x4e29adc5, + 0x335aab9d, 0xeecc7218, 0x53061ed6, 0x8e90c753, 0xae99ff49, + 0x730f26cc, 0xcec54a02, 0x13539387, 0x6e2095df, 0xb3b64c5a, + 0x0e7c2094, 0xd3eaf911, 0xf49a2c24, 0x290cf5a1, 0x94c6996f, + 0x495040ea, 0x342346b2, 0xe9b59f37, 0x547ff3f9, 0x89e92a7c, + 0x1a9e5993, 0xc7088016, 0x7ac2ecd8, 0xa754355d, 0xda273305, + 0x07b1ea80, 0xba7b864e, 0x67ed5fcb, 0x409d8afe, 0x9d0b537b, + 0x20c13fb5, 0xfd57e630, 0x8024e068, 0x5db239ed, 0xe0785523, + 0x3dee8ca6}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x85d996dd, 0x4bb55c60, 0xce6ccabd, 0x966ab9c0, + 0x13b32f1d, 0xdddfe5a0, 0x5806737d, 0x6dd3035a, 0xe80a9587, + 0x26665f3a, 0xa3bfc9e7, 0xfbb9ba9a, 0x7e602c47, 0xb00ce6fa, + 0x35d57027, 0xdaa607b4, 0x5f7f9169, 0x91135bd4, 0x14cacd09, + 0x4cccbe74, 0xc91528a9, 0x0779e214, 0x82a074c9, 0xb77504ee, + 0x32ac9233, 0xfcc0588e, 0x7919ce53, 0x211fbd2e, 0xa4c62bf3, + 0x6aaae14e, 0xef737793, 0xf54b7eb3, 0x7092e86e, 0xbefe22d3, + 0x3b27b40e, 0x6321c773, 0xe6f851ae, 0x28949b13, 0xad4d0dce, + 0x98987de9, 0x1d41eb34, 0xd32d2189, 0x56f4b754, 0x0ef2c429, + 0x8b2b52f4, 0x45479849, 0xc09e0e94, 0x2fed7907, 0xaa34efda, + 0x64582567, 0xe181b3ba, 0xb987c0c7, 0x3c5e561a, 0xf2329ca7, + 0x77eb0a7a, 0x423e7a5d, 0xc7e7ec80, 0x098b263d, 0x8c52b0e0, + 0xd454c39d, 0x518d5540, 0x9fe19ffd, 0x1a380920, 0xab918dbd, + 0x2e481b60, 0xe024d1dd, 0x65fd4700, 0x3dfb347d, 0xb822a2a0, + 0x764e681d, 0xf397fec0, 0xc6428ee7, 0x439b183a, 0x8df7d287, + 0x082e445a, 0x50283727, 0xd5f1a1fa, 0x1b9d6b47, 0x9e44fd9a, + 0x71378a09, 0xf4ee1cd4, 0x3a82d669, 0xbf5b40b4, 0xe75d33c9, + 0x6284a514, 0xace86fa9, 0x2931f974, 0x1ce48953, 0x993d1f8e, + 0x5751d533, 0xd28843ee, 0x8a8e3093, 0x0f57a64e, 0xc13b6cf3, + 0x44e2fa2e, 0x5edaf30e, 0xdb0365d3, 0x156faf6e, 0x90b639b3, + 0xc8b04ace, 0x4d69dc13, 0x830516ae, 0x06dc8073, 0x3309f054, + 0xb6d06689, 0x78bcac34, 0xfd653ae9, 0xa5634994, 0x20badf49, + 0xeed615f4, 0x6b0f8329, 0x847cf4ba, 0x01a56267, 0xcfc9a8da, + 0x4a103e07, 0x12164d7a, 0x97cfdba7, 0x59a3111a, 0xdc7a87c7, + 0xe9aff7e0, 0x6c76613d, 0xa21aab80, 0x27c33d5d, 0x7fc54e20, + 0xfa1cd8fd, 0x34701240, 0xb1a9849d, 0x17256aa0, 0x92fcfc7d, + 0x5c9036c0, 0xd949a01d, 0x814fd360, 0x049645bd, 0xcafa8f00, + 0x4f2319dd, 0x7af669fa, 0xff2fff27, 0x3143359a, 0xb49aa347, + 0xec9cd03a, 0x694546e7, 0xa7298c5a, 0x22f01a87, 0xcd836d14, + 0x485afbc9, 0x86363174, 0x03efa7a9, 0x5be9d4d4, 0xde304209, + 0x105c88b4, 0x95851e69, 0xa0506e4e, 0x2589f893, 0xebe5322e, + 0x6e3ca4f3, 0x363ad78e, 0xb3e34153, 0x7d8f8bee, 0xf8561d33, + 0xe26e1413, 0x67b782ce, 0xa9db4873, 0x2c02deae, 0x7404add3, + 0xf1dd3b0e, 0x3fb1f1b3, 0xba68676e, 0x8fbd1749, 0x0a648194, + 0xc4084b29, 0x41d1ddf4, 0x19d7ae89, 0x9c0e3854, 0x5262f2e9, + 0xd7bb6434, 0x38c813a7, 0xbd11857a, 0x737d4fc7, 0xf6a4d91a, + 0xaea2aa67, 0x2b7b3cba, 0xe517f607, 0x60ce60da, 0x551b10fd, + 0xd0c28620, 0x1eae4c9d, 0x9b77da40, 0xc371a93d, 0x46a83fe0, + 0x88c4f55d, 0x0d1d6380, 0xbcb4e71d, 0x396d71c0, 0xf701bb7d, + 0x72d82da0, 0x2ade5edd, 0xaf07c800, 0x616b02bd, 0xe4b29460, + 0xd167e447, 0x54be729a, 0x9ad2b827, 0x1f0b2efa, 0x470d5d87, + 0xc2d4cb5a, 0x0cb801e7, 0x8961973a, 0x6612e0a9, 0xe3cb7674, + 0x2da7bcc9, 0xa87e2a14, 0xf0785969, 0x75a1cfb4, 0xbbcd0509, + 0x3e1493d4, 0x0bc1e3f3, 0x8e18752e, 0x4074bf93, 0xc5ad294e, + 0x9dab5a33, 0x1872ccee, 0xd61e0653, 0x53c7908e, 0x49ff99ae, + 0xcc260f73, 0x024ac5ce, 0x87935313, 0xdf95206e, 0x5a4cb6b3, + 0x94207c0e, 0x11f9ead3, 0x242c9af4, 0xa1f50c29, 0x6f99c694, + 0xea405049, 0xb2462334, 0x379fb5e9, 0xf9f37f54, 0x7c2ae989, + 0x93599e1a, 0x168008c7, 0xd8ecc27a, 0x5d3554a7, 0x053327da, + 0x80eab107, 0x4e867bba, 0xcb5fed67, 0xfe8a9d40, 0x7b530b9d, + 0xb53fc120, 0x30e657fd, 0x68e02480, 0xed39b25d, 0x235578e0, + 0xa68cee3d}, + {0x00000000, 0x76e10f9d, 0xadc46ee1, 0xdb25617c, 0x1b8fac19, + 0x6d6ea384, 0xb64bc2f8, 0xc0aacd65, 0x361e5933, 0x40ff56ae, + 0x9bda37d2, 0xed3b384f, 0x2d91f52a, 0x5b70fab7, 0x80559bcb, + 0xf6b49456, 0x6c3cb266, 0x1addbdfb, 0xc1f8dc87, 0xb719d31a, + 0x77b31e7f, 0x015211e2, 0xda77709e, 0xac967f03, 0x5a22eb55, + 0x2cc3e4c8, 0xf7e685b4, 0x81078a29, 0x41ad474c, 0x374c48d1, + 0xec6929ad, 0x9a882630, 0xd87864cd, 0xae996b50, 0x75bc0a2c, + 0x035d05b1, 0xc3f7c8d4, 0xb516c749, 0x6e33a635, 0x18d2a9a8, + 0xee663dfe, 0x98873263, 0x43a2531f, 0x35435c82, 0xf5e991e7, + 0x83089e7a, 0x582dff06, 0x2eccf09b, 0xb444d6ab, 0xc2a5d936, + 0x1980b84a, 0x6f61b7d7, 0xafcb7ab2, 0xd92a752f, 0x020f1453, + 0x74ee1bce, 0x825a8f98, 0xf4bb8005, 0x2f9ee179, 0x597feee4, + 0x99d52381, 0xef342c1c, 0x34114d60, 0x42f042fd, 0xf1f7b941, + 0x8716b6dc, 0x5c33d7a0, 0x2ad2d83d, 0xea781558, 0x9c991ac5, + 0x47bc7bb9, 0x315d7424, 0xc7e9e072, 0xb108efef, 0x6a2d8e93, + 0x1ccc810e, 0xdc664c6b, 0xaa8743f6, 0x71a2228a, 0x07432d17, + 0x9dcb0b27, 0xeb2a04ba, 0x300f65c6, 0x46ee6a5b, 0x8644a73e, + 0xf0a5a8a3, 0x2b80c9df, 0x5d61c642, 0xabd55214, 0xdd345d89, + 0x06113cf5, 0x70f03368, 0xb05afe0d, 0xc6bbf190, 0x1d9e90ec, + 0x6b7f9f71, 0x298fdd8c, 0x5f6ed211, 0x844bb36d, 0xf2aabcf0, + 0x32007195, 0x44e17e08, 0x9fc41f74, 0xe92510e9, 0x1f9184bf, + 0x69708b22, 0xb255ea5e, 0xc4b4e5c3, 0x041e28a6, 0x72ff273b, + 0xa9da4647, 0xdf3b49da, 0x45b36fea, 0x33526077, 0xe877010b, + 0x9e960e96, 0x5e3cc3f3, 0x28ddcc6e, 0xf3f8ad12, 0x8519a28f, + 0x73ad36d9, 0x054c3944, 0xde695838, 0xa88857a5, 0x68229ac0, + 0x1ec3955d, 0xc5e6f421, 0xb307fbbc, 0xe2ef7383, 0x940e7c1e, + 0x4f2b1d62, 0x39ca12ff, 0xf960df9a, 0x8f81d007, 0x54a4b17b, + 0x2245bee6, 0xd4f12ab0, 0xa210252d, 0x79354451, 0x0fd44bcc, + 0xcf7e86a9, 0xb99f8934, 0x62bae848, 0x145be7d5, 0x8ed3c1e5, + 0xf832ce78, 0x2317af04, 0x55f6a099, 0x955c6dfc, 0xe3bd6261, + 0x3898031d, 0x4e790c80, 0xb8cd98d6, 0xce2c974b, 0x1509f637, + 0x63e8f9aa, 0xa34234cf, 0xd5a33b52, 0x0e865a2e, 0x786755b3, + 0x3a97174e, 0x4c7618d3, 0x975379af, 0xe1b27632, 0x2118bb57, + 0x57f9b4ca, 0x8cdcd5b6, 0xfa3dda2b, 0x0c894e7d, 0x7a6841e0, + 0xa14d209c, 0xd7ac2f01, 0x1706e264, 0x61e7edf9, 0xbac28c85, + 0xcc238318, 0x56aba528, 0x204aaab5, 0xfb6fcbc9, 0x8d8ec454, + 0x4d240931, 0x3bc506ac, 0xe0e067d0, 0x9601684d, 0x60b5fc1b, + 0x1654f386, 0xcd7192fa, 0xbb909d67, 0x7b3a5002, 0x0ddb5f9f, + 0xd6fe3ee3, 0xa01f317e, 0x1318cac2, 0x65f9c55f, 0xbedca423, + 0xc83dabbe, 0x089766db, 0x7e766946, 0xa553083a, 0xd3b207a7, + 0x250693f1, 0x53e79c6c, 0x88c2fd10, 0xfe23f28d, 0x3e893fe8, + 0x48683075, 0x934d5109, 0xe5ac5e94, 0x7f2478a4, 0x09c57739, + 0xd2e01645, 0xa40119d8, 0x64abd4bd, 0x124adb20, 0xc96fba5c, + 0xbf8eb5c1, 0x493a2197, 0x3fdb2e0a, 0xe4fe4f76, 0x921f40eb, + 0x52b58d8e, 0x24548213, 0xff71e36f, 0x8990ecf2, 0xcb60ae0f, + 0xbd81a192, 0x66a4c0ee, 0x1045cf73, 0xd0ef0216, 0xa60e0d8b, + 0x7d2b6cf7, 0x0bca636a, 0xfd7ef73c, 0x8b9ff8a1, 0x50ba99dd, + 0x265b9640, 0xe6f15b25, 0x901054b8, 0x4b3535c4, 0x3dd43a59, + 0xa75c1c69, 0xd1bd13f4, 0x0a987288, 0x7c797d15, 0xbcd3b070, + 0xca32bfed, 0x1117de91, 0x67f6d10c, 0x9142455a, 0xe7a34ac7, + 0x3c862bbb, 0x4a672426, 0x8acde943, 0xfc2ce6de, 0x270987a2, + 0x51e8883f}, + {0x00000000, 0xe8dbfbb9, 0x91b186a8, 0x796a7d11, 0x63657c8a, + 0x8bbe8733, 0xf2d4fa22, 0x1a0f019b, 0x87cc89cf, 0x6f177276, + 0x167d0f67, 0xfea6f4de, 0xe4a9f545, 0x0c720efc, 0x751873ed, + 0x9dc38854, 0x4f9f6244, 0xa74499fd, 0xde2ee4ec, 0x36f51f55, + 0x2cfa1ece, 0xc421e577, 0xbd4b9866, 0x559063df, 0xc853eb8b, + 0x20881032, 0x59e26d23, 0xb139969a, 0xab369701, 0x43ed6cb8, + 0x3a8711a9, 0xd25cea10, 0x9e3ec588, 0x76e53e31, 0x0f8f4320, + 0xe754b899, 0xfd5bb902, 0x158042bb, 0x6cea3faa, 0x8431c413, + 0x19f24c47, 0xf129b7fe, 0x8843caef, 0x60983156, 0x7a9730cd, + 0x924ccb74, 0xeb26b665, 0x03fd4ddc, 0xd1a1a7cc, 0x397a5c75, + 0x40102164, 0xa8cbdadd, 0xb2c4db46, 0x5a1f20ff, 0x23755dee, + 0xcbaea657, 0x566d2e03, 0xbeb6d5ba, 0xc7dca8ab, 0x2f075312, + 0x35085289, 0xddd3a930, 0xa4b9d421, 0x4c622f98, 0x7d7bfbca, + 0x95a00073, 0xecca7d62, 0x041186db, 0x1e1e8740, 0xf6c57cf9, + 0x8faf01e8, 0x6774fa51, 0xfab77205, 0x126c89bc, 0x6b06f4ad, + 0x83dd0f14, 0x99d20e8f, 0x7109f536, 0x08638827, 0xe0b8739e, + 0x32e4998e, 0xda3f6237, 0xa3551f26, 0x4b8ee49f, 0x5181e504, + 0xb95a1ebd, 0xc03063ac, 0x28eb9815, 0xb5281041, 0x5df3ebf8, + 0x249996e9, 0xcc426d50, 0xd64d6ccb, 0x3e969772, 0x47fcea63, + 0xaf2711da, 0xe3453e42, 0x0b9ec5fb, 0x72f4b8ea, 0x9a2f4353, + 0x802042c8, 0x68fbb971, 0x1191c460, 0xf94a3fd9, 0x6489b78d, + 0x8c524c34, 0xf5383125, 0x1de3ca9c, 0x07eccb07, 0xef3730be, + 0x965d4daf, 0x7e86b616, 0xacda5c06, 0x4401a7bf, 0x3d6bdaae, + 0xd5b02117, 0xcfbf208c, 0x2764db35, 0x5e0ea624, 0xb6d55d9d, + 0x2b16d5c9, 0xc3cd2e70, 0xbaa75361, 0x527ca8d8, 0x4873a943, + 0xa0a852fa, 0xd9c22feb, 0x3119d452, 0xbbf0874e, 0x532b7cf7, + 0x2a4101e6, 0xc29afa5f, 0xd895fbc4, 0x304e007d, 0x49247d6c, + 0xa1ff86d5, 0x3c3c0e81, 0xd4e7f538, 0xad8d8829, 0x45567390, + 0x5f59720b, 0xb78289b2, 0xcee8f4a3, 0x26330f1a, 0xf46fe50a, + 0x1cb41eb3, 0x65de63a2, 0x8d05981b, 0x970a9980, 0x7fd16239, + 0x06bb1f28, 0xee60e491, 0x73a36cc5, 0x9b78977c, 0xe212ea6d, + 0x0ac911d4, 0x10c6104f, 0xf81debf6, 0x817796e7, 0x69ac6d5e, + 0x25ce42c6, 0xcd15b97f, 0xb47fc46e, 0x5ca43fd7, 0x46ab3e4c, + 0xae70c5f5, 0xd71ab8e4, 0x3fc1435d, 0xa202cb09, 0x4ad930b0, + 0x33b34da1, 0xdb68b618, 0xc167b783, 0x29bc4c3a, 0x50d6312b, + 0xb80dca92, 0x6a512082, 0x828adb3b, 0xfbe0a62a, 0x133b5d93, + 0x09345c08, 0xe1efa7b1, 0x9885daa0, 0x705e2119, 0xed9da94d, + 0x054652f4, 0x7c2c2fe5, 0x94f7d45c, 0x8ef8d5c7, 0x66232e7e, + 0x1f49536f, 0xf792a8d6, 0xc68b7c84, 0x2e50873d, 0x573afa2c, + 0xbfe10195, 0xa5ee000e, 0x4d35fbb7, 0x345f86a6, 0xdc847d1f, + 0x4147f54b, 0xa99c0ef2, 0xd0f673e3, 0x382d885a, 0x222289c1, + 0xcaf97278, 0xb3930f69, 0x5b48f4d0, 0x89141ec0, 0x61cfe579, + 0x18a59868, 0xf07e63d1, 0xea71624a, 0x02aa99f3, 0x7bc0e4e2, + 0x931b1f5b, 0x0ed8970f, 0xe6036cb6, 0x9f6911a7, 0x77b2ea1e, + 0x6dbdeb85, 0x8566103c, 0xfc0c6d2d, 0x14d79694, 0x58b5b90c, + 0xb06e42b5, 0xc9043fa4, 0x21dfc41d, 0x3bd0c586, 0xd30b3e3f, + 0xaa61432e, 0x42bab897, 0xdf7930c3, 0x37a2cb7a, 0x4ec8b66b, + 0xa6134dd2, 0xbc1c4c49, 0x54c7b7f0, 0x2dadcae1, 0xc5763158, + 0x172adb48, 0xfff120f1, 0x869b5de0, 0x6e40a659, 0x744fa7c2, + 0x9c945c7b, 0xe5fe216a, 0x0d25dad3, 0x90e65287, 0x783da93e, + 0x0157d42f, 0xe98c2f96, 0xf3832e0d, 0x1b58d5b4, 0x6232a8a5, + 0x8ae9531c}, + {0x00000000, 0x919168ae, 0x6325a087, 0xf2b4c829, 0x874c31d4, + 0x16dd597a, 0xe4699153, 0x75f8f9fd, 0x4f9f1373, 0xde0e7bdd, + 0x2cbab3f4, 0xbd2bdb5a, 0xc8d322a7, 0x59424a09, 0xabf68220, + 0x3a67ea8e, 0x9e3e27e6, 0x0faf4f48, 0xfd1b8761, 0x6c8aefcf, + 0x19721632, 0x88e37e9c, 0x7a57b6b5, 0xebc6de1b, 0xd1a13495, + 0x40305c3b, 0xb2849412, 0x2315fcbc, 0x56ed0541, 0xc77c6def, + 0x35c8a5c6, 0xa459cd68, 0x7d7b3f17, 0xecea57b9, 0x1e5e9f90, + 0x8fcff73e, 0xfa370ec3, 0x6ba6666d, 0x9912ae44, 0x0883c6ea, + 0x32e42c64, 0xa37544ca, 0x51c18ce3, 0xc050e44d, 0xb5a81db0, + 0x2439751e, 0xd68dbd37, 0x471cd599, 0xe34518f1, 0x72d4705f, + 0x8060b876, 0x11f1d0d8, 0x64092925, 0xf598418b, 0x072c89a2, + 0x96bde10c, 0xacda0b82, 0x3d4b632c, 0xcfffab05, 0x5e6ec3ab, + 0x2b963a56, 0xba0752f8, 0x48b39ad1, 0xd922f27f, 0xfaf67e2e, + 0x6b671680, 0x99d3dea9, 0x0842b607, 0x7dba4ffa, 0xec2b2754, + 0x1e9fef7d, 0x8f0e87d3, 0xb5696d5d, 0x24f805f3, 0xd64ccdda, + 0x47dda574, 0x32255c89, 0xa3b43427, 0x5100fc0e, 0xc09194a0, + 0x64c859c8, 0xf5593166, 0x07edf94f, 0x967c91e1, 0xe384681c, + 0x721500b2, 0x80a1c89b, 0x1130a035, 0x2b574abb, 0xbac62215, + 0x4872ea3c, 0xd9e38292, 0xac1b7b6f, 0x3d8a13c1, 0xcf3edbe8, + 0x5eafb346, 0x878d4139, 0x161c2997, 0xe4a8e1be, 0x75398910, + 0x00c170ed, 0x91501843, 0x63e4d06a, 0xf275b8c4, 0xc812524a, + 0x59833ae4, 0xab37f2cd, 0x3aa69a63, 0x4f5e639e, 0xdecf0b30, + 0x2c7bc319, 0xbdeaabb7, 0x19b366df, 0x88220e71, 0x7a96c658, + 0xeb07aef6, 0x9eff570b, 0x0f6e3fa5, 0xfddaf78c, 0x6c4b9f22, + 0x562c75ac, 0xc7bd1d02, 0x3509d52b, 0xa498bd85, 0xd1604478, + 0x40f12cd6, 0xb245e4ff, 0x23d48c51, 0xf4edfd5c, 0x657c95f2, + 0x97c85ddb, 0x06593575, 0x73a1cc88, 0xe230a426, 0x10846c0f, + 0x811504a1, 0xbb72ee2f, 0x2ae38681, 0xd8574ea8, 0x49c62606, + 0x3c3edffb, 0xadafb755, 0x5f1b7f7c, 0xce8a17d2, 0x6ad3daba, + 0xfb42b214, 0x09f67a3d, 0x98671293, 0xed9feb6e, 0x7c0e83c0, + 0x8eba4be9, 0x1f2b2347, 0x254cc9c9, 0xb4dda167, 0x4669694e, + 0xd7f801e0, 0xa200f81d, 0x339190b3, 0xc125589a, 0x50b43034, + 0x8996c24b, 0x1807aae5, 0xeab362cc, 0x7b220a62, 0x0edaf39f, + 0x9f4b9b31, 0x6dff5318, 0xfc6e3bb6, 0xc609d138, 0x5798b996, + 0xa52c71bf, 0x34bd1911, 0x4145e0ec, 0xd0d48842, 0x2260406b, + 0xb3f128c5, 0x17a8e5ad, 0x86398d03, 0x748d452a, 0xe51c2d84, + 0x90e4d479, 0x0175bcd7, 0xf3c174fe, 0x62501c50, 0x5837f6de, + 0xc9a69e70, 0x3b125659, 0xaa833ef7, 0xdf7bc70a, 0x4eeaafa4, + 0xbc5e678d, 0x2dcf0f23, 0x0e1b8372, 0x9f8aebdc, 0x6d3e23f5, + 0xfcaf4b5b, 0x8957b2a6, 0x18c6da08, 0xea721221, 0x7be37a8f, + 0x41849001, 0xd015f8af, 0x22a13086, 0xb3305828, 0xc6c8a1d5, + 0x5759c97b, 0xa5ed0152, 0x347c69fc, 0x9025a494, 0x01b4cc3a, + 0xf3000413, 0x62916cbd, 0x17699540, 0x86f8fdee, 0x744c35c7, + 0xe5dd5d69, 0xdfbab7e7, 0x4e2bdf49, 0xbc9f1760, 0x2d0e7fce, + 0x58f68633, 0xc967ee9d, 0x3bd326b4, 0xaa424e1a, 0x7360bc65, + 0xe2f1d4cb, 0x10451ce2, 0x81d4744c, 0xf42c8db1, 0x65bde51f, + 0x97092d36, 0x06984598, 0x3cffaf16, 0xad6ec7b8, 0x5fda0f91, + 0xce4b673f, 0xbbb39ec2, 0x2a22f66c, 0xd8963e45, 0x490756eb, + 0xed5e9b83, 0x7ccff32d, 0x8e7b3b04, 0x1fea53aa, 0x6a12aa57, + 0xfb83c2f9, 0x09370ad0, 0x98a6627e, 0xa2c188f0, 0x3350e05e, + 0xc1e42877, 0x507540d9, 0x258db924, 0xb41cd18a, 0x46a819a3, + 0xd739710d}}; + +#endif + +#endif + +#if N == 5 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xaf449247, 0x85f822cf, 0x2abcb088, 0xd08143df, + 0x7fc5d198, 0x55796110, 0xfa3df357, 0x7a7381ff, 0xd53713b8, + 0xff8ba330, 0x50cf3177, 0xaaf2c220, 0x05b65067, 0x2f0ae0ef, + 0x804e72a8, 0xf4e703fe, 0x5ba391b9, 0x711f2131, 0xde5bb376, + 0x24664021, 0x8b22d266, 0xa19e62ee, 0x0edaf0a9, 0x8e948201, + 0x21d01046, 0x0b6ca0ce, 0xa4283289, 0x5e15c1de, 0xf1515399, + 0xdbede311, 0x74a97156, 0x32bf01bd, 0x9dfb93fa, 0xb7472372, + 0x1803b135, 0xe23e4262, 0x4d7ad025, 0x67c660ad, 0xc882f2ea, + 0x48cc8042, 0xe7881205, 0xcd34a28d, 0x627030ca, 0x984dc39d, + 0x370951da, 0x1db5e152, 0xb2f17315, 0xc6580243, 0x691c9004, + 0x43a0208c, 0xece4b2cb, 0x16d9419c, 0xb99dd3db, 0x93216353, + 0x3c65f114, 0xbc2b83bc, 0x136f11fb, 0x39d3a173, 0x96973334, + 0x6caac063, 0xc3ee5224, 0xe952e2ac, 0x461670eb, 0x657e037a, + 0xca3a913d, 0xe08621b5, 0x4fc2b3f2, 0xb5ff40a5, 0x1abbd2e2, + 0x3007626a, 0x9f43f02d, 0x1f0d8285, 0xb04910c2, 0x9af5a04a, + 0x35b1320d, 0xcf8cc15a, 0x60c8531d, 0x4a74e395, 0xe53071d2, + 0x91990084, 0x3edd92c3, 0x1461224b, 0xbb25b00c, 0x4118435b, + 0xee5cd11c, 0xc4e06194, 0x6ba4f3d3, 0xebea817b, 0x44ae133c, + 0x6e12a3b4, 0xc15631f3, 0x3b6bc2a4, 0x942f50e3, 0xbe93e06b, + 0x11d7722c, 0x57c102c7, 0xf8859080, 0xd2392008, 0x7d7db24f, + 0x87404118, 0x2804d35f, 0x02b863d7, 0xadfcf190, 0x2db28338, + 0x82f6117f, 0xa84aa1f7, 0x070e33b0, 0xfd33c0e7, 0x527752a0, + 0x78cbe228, 0xd78f706f, 0xa3260139, 0x0c62937e, 0x26de23f6, + 0x899ab1b1, 0x73a742e6, 0xdce3d0a1, 0xf65f6029, 0x591bf26e, + 0xd95580c6, 0x76111281, 0x5cada209, 0xf3e9304e, 0x09d4c319, + 0xa690515e, 0x8c2ce1d6, 0x23687391, 0xcafc06f4, 0x65b894b3, + 0x4f04243b, 0xe040b67c, 0x1a7d452b, 0xb539d76c, 0x9f8567e4, + 0x30c1f5a3, 0xb08f870b, 0x1fcb154c, 0x3577a5c4, 0x9a333783, + 0x600ec4d4, 0xcf4a5693, 0xe5f6e61b, 0x4ab2745c, 0x3e1b050a, + 0x915f974d, 0xbbe327c5, 0x14a7b582, 0xee9a46d5, 0x41ded492, + 0x6b62641a, 0xc426f65d, 0x446884f5, 0xeb2c16b2, 0xc190a63a, + 0x6ed4347d, 0x94e9c72a, 0x3bad556d, 0x1111e5e5, 0xbe5577a2, + 0xf8430749, 0x5707950e, 0x7dbb2586, 0xd2ffb7c1, 0x28c24496, + 0x8786d6d1, 0xad3a6659, 0x027ef41e, 0x823086b6, 0x2d7414f1, + 0x07c8a479, 0xa88c363e, 0x52b1c569, 0xfdf5572e, 0xd749e7a6, + 0x780d75e1, 0x0ca404b7, 0xa3e096f0, 0x895c2678, 0x2618b43f, + 0xdc254768, 0x7361d52f, 0x59dd65a7, 0xf699f7e0, 0x76d78548, + 0xd993170f, 0xf32fa787, 0x5c6b35c0, 0xa656c697, 0x091254d0, + 0x23aee458, 0x8cea761f, 0xaf82058e, 0x00c697c9, 0x2a7a2741, + 0x853eb506, 0x7f034651, 0xd047d416, 0xfafb649e, 0x55bff6d9, + 0xd5f18471, 0x7ab51636, 0x5009a6be, 0xff4d34f9, 0x0570c7ae, + 0xaa3455e9, 0x8088e561, 0x2fcc7726, 0x5b650670, 0xf4219437, + 0xde9d24bf, 0x71d9b6f8, 0x8be445af, 0x24a0d7e8, 0x0e1c6760, + 0xa158f527, 0x2116878f, 0x8e5215c8, 0xa4eea540, 0x0baa3707, + 0xf197c450, 0x5ed35617, 0x746fe69f, 0xdb2b74d8, 0x9d3d0433, + 0x32799674, 0x18c526fc, 0xb781b4bb, 0x4dbc47ec, 0xe2f8d5ab, + 0xc8446523, 0x6700f764, 0xe74e85cc, 0x480a178b, 0x62b6a703, + 0xcdf23544, 0x37cfc613, 0x988b5454, 0xb237e4dc, 0x1d73769b, + 0x69da07cd, 0xc69e958a, 0xec222502, 0x4366b745, 0xb95b4412, + 0x161fd655, 0x3ca366dd, 0x93e7f49a, 0x13a98632, 0xbced1475, + 0x9651a4fd, 0x391536ba, 0xc328c5ed, 0x6c6c57aa, 0x46d0e722, + 0xe9947565}, + {0x00000000, 0x4e890ba9, 0x9d121752, 0xd39b1cfb, 0xe15528e5, + 0xafdc234c, 0x7c473fb7, 0x32ce341e, 0x19db578b, 0x57525c22, + 0x84c940d9, 0xca404b70, 0xf88e7f6e, 0xb60774c7, 0x659c683c, + 0x2b156395, 0x33b6af16, 0x7d3fa4bf, 0xaea4b844, 0xe02db3ed, + 0xd2e387f3, 0x9c6a8c5a, 0x4ff190a1, 0x01789b08, 0x2a6df89d, + 0x64e4f334, 0xb77fefcf, 0xf9f6e466, 0xcb38d078, 0x85b1dbd1, + 0x562ac72a, 0x18a3cc83, 0x676d5e2c, 0x29e45585, 0xfa7f497e, + 0xb4f642d7, 0x863876c9, 0xc8b17d60, 0x1b2a619b, 0x55a36a32, + 0x7eb609a7, 0x303f020e, 0xe3a41ef5, 0xad2d155c, 0x9fe32142, + 0xd16a2aeb, 0x02f13610, 0x4c783db9, 0x54dbf13a, 0x1a52fa93, + 0xc9c9e668, 0x8740edc1, 0xb58ed9df, 0xfb07d276, 0x289cce8d, + 0x6615c524, 0x4d00a6b1, 0x0389ad18, 0xd012b1e3, 0x9e9bba4a, + 0xac558e54, 0xe2dc85fd, 0x31479906, 0x7fce92af, 0xcedabc58, + 0x8053b7f1, 0x53c8ab0a, 0x1d41a0a3, 0x2f8f94bd, 0x61069f14, + 0xb29d83ef, 0xfc148846, 0xd701ebd3, 0x9988e07a, 0x4a13fc81, + 0x049af728, 0x3654c336, 0x78ddc89f, 0xab46d464, 0xe5cfdfcd, + 0xfd6c134e, 0xb3e518e7, 0x607e041c, 0x2ef70fb5, 0x1c393bab, + 0x52b03002, 0x812b2cf9, 0xcfa22750, 0xe4b744c5, 0xaa3e4f6c, + 0x79a55397, 0x372c583e, 0x05e26c20, 0x4b6b6789, 0x98f07b72, + 0xd67970db, 0xa9b7e274, 0xe73ee9dd, 0x34a5f526, 0x7a2cfe8f, + 0x48e2ca91, 0x066bc138, 0xd5f0ddc3, 0x9b79d66a, 0xb06cb5ff, + 0xfee5be56, 0x2d7ea2ad, 0x63f7a904, 0x51399d1a, 0x1fb096b3, + 0xcc2b8a48, 0x82a281e1, 0x9a014d62, 0xd48846cb, 0x07135a30, + 0x499a5199, 0x7b546587, 0x35dd6e2e, 0xe64672d5, 0xa8cf797c, + 0x83da1ae9, 0xcd531140, 0x1ec80dbb, 0x50410612, 0x628f320c, + 0x2c0639a5, 0xff9d255e, 0xb1142ef7, 0x46c47ef1, 0x084d7558, + 0xdbd669a3, 0x955f620a, 0xa7915614, 0xe9185dbd, 0x3a834146, + 0x740a4aef, 0x5f1f297a, 0x119622d3, 0xc20d3e28, 0x8c843581, + 0xbe4a019f, 0xf0c30a36, 0x235816cd, 0x6dd11d64, 0x7572d1e7, + 0x3bfbda4e, 0xe860c6b5, 0xa6e9cd1c, 0x9427f902, 0xdaaef2ab, + 0x0935ee50, 0x47bce5f9, 0x6ca9866c, 0x22208dc5, 0xf1bb913e, + 0xbf329a97, 0x8dfcae89, 0xc375a520, 0x10eeb9db, 0x5e67b272, + 0x21a920dd, 0x6f202b74, 0xbcbb378f, 0xf2323c26, 0xc0fc0838, + 0x8e750391, 0x5dee1f6a, 0x136714c3, 0x38727756, 0x76fb7cff, + 0xa5606004, 0xebe96bad, 0xd9275fb3, 0x97ae541a, 0x443548e1, + 0x0abc4348, 0x121f8fcb, 0x5c968462, 0x8f0d9899, 0xc1849330, + 0xf34aa72e, 0xbdc3ac87, 0x6e58b07c, 0x20d1bbd5, 0x0bc4d840, + 0x454dd3e9, 0x96d6cf12, 0xd85fc4bb, 0xea91f0a5, 0xa418fb0c, + 0x7783e7f7, 0x390aec5e, 0x881ec2a9, 0xc697c900, 0x150cd5fb, + 0x5b85de52, 0x694bea4c, 0x27c2e1e5, 0xf459fd1e, 0xbad0f6b7, + 0x91c59522, 0xdf4c9e8b, 0x0cd78270, 0x425e89d9, 0x7090bdc7, + 0x3e19b66e, 0xed82aa95, 0xa30ba13c, 0xbba86dbf, 0xf5216616, + 0x26ba7aed, 0x68337144, 0x5afd455a, 0x14744ef3, 0xc7ef5208, + 0x896659a1, 0xa2733a34, 0xecfa319d, 0x3f612d66, 0x71e826cf, + 0x432612d1, 0x0daf1978, 0xde340583, 0x90bd0e2a, 0xef739c85, + 0xa1fa972c, 0x72618bd7, 0x3ce8807e, 0x0e26b460, 0x40afbfc9, + 0x9334a332, 0xddbda89b, 0xf6a8cb0e, 0xb821c0a7, 0x6bbadc5c, + 0x2533d7f5, 0x17fde3eb, 0x5974e842, 0x8aeff4b9, 0xc466ff10, + 0xdcc53393, 0x924c383a, 0x41d724c1, 0x0f5e2f68, 0x3d901b76, + 0x731910df, 0xa0820c24, 0xee0b078d, 0xc51e6418, 0x8b976fb1, + 0x580c734a, 0x168578e3, 0x244b4cfd, 0x6ac24754, 0xb9595baf, + 0xf7d05006}, + {0x00000000, 0x8d88fde2, 0xc060fd85, 0x4de80067, 0x5bb0fd4b, + 0xd63800a9, 0x9bd000ce, 0x1658fd2c, 0xb761fa96, 0x3ae90774, + 0x77010713, 0xfa89faf1, 0xecd107dd, 0x6159fa3f, 0x2cb1fa58, + 0xa13907ba, 0xb5b2f36d, 0x383a0e8f, 0x75d20ee8, 0xf85af30a, + 0xee020e26, 0x638af3c4, 0x2e62f3a3, 0xa3ea0e41, 0x02d309fb, + 0x8f5bf419, 0xc2b3f47e, 0x4f3b099c, 0x5963f4b0, 0xd4eb0952, + 0x99030935, 0x148bf4d7, 0xb014e09b, 0x3d9c1d79, 0x70741d1e, + 0xfdfce0fc, 0xeba41dd0, 0x662ce032, 0x2bc4e055, 0xa64c1db7, + 0x07751a0d, 0x8afde7ef, 0xc715e788, 0x4a9d1a6a, 0x5cc5e746, + 0xd14d1aa4, 0x9ca51ac3, 0x112de721, 0x05a613f6, 0x882eee14, + 0xc5c6ee73, 0x484e1391, 0x5e16eebd, 0xd39e135f, 0x9e761338, + 0x13feeeda, 0xb2c7e960, 0x3f4f1482, 0x72a714e5, 0xff2fe907, + 0xe977142b, 0x64ffe9c9, 0x2917e9ae, 0xa49f144c, 0xbb58c777, + 0x36d03a95, 0x7b383af2, 0xf6b0c710, 0xe0e83a3c, 0x6d60c7de, + 0x2088c7b9, 0xad003a5b, 0x0c393de1, 0x81b1c003, 0xcc59c064, + 0x41d13d86, 0x5789c0aa, 0xda013d48, 0x97e93d2f, 0x1a61c0cd, + 0x0eea341a, 0x8362c9f8, 0xce8ac99f, 0x4302347d, 0x555ac951, + 0xd8d234b3, 0x953a34d4, 0x18b2c936, 0xb98bce8c, 0x3403336e, + 0x79eb3309, 0xf463ceeb, 0xe23b33c7, 0x6fb3ce25, 0x225bce42, + 0xafd333a0, 0x0b4c27ec, 0x86c4da0e, 0xcb2cda69, 0x46a4278b, + 0x50fcdaa7, 0xdd742745, 0x909c2722, 0x1d14dac0, 0xbc2ddd7a, + 0x31a52098, 0x7c4d20ff, 0xf1c5dd1d, 0xe79d2031, 0x6a15ddd3, + 0x27fdddb4, 0xaa752056, 0xbefed481, 0x33762963, 0x7e9e2904, + 0xf316d4e6, 0xe54e29ca, 0x68c6d428, 0x252ed44f, 0xa8a629ad, + 0x099f2e17, 0x8417d3f5, 0xc9ffd392, 0x44772e70, 0x522fd35c, + 0xdfa72ebe, 0x924f2ed9, 0x1fc7d33b, 0xadc088af, 0x2048754d, + 0x6da0752a, 0xe02888c8, 0xf67075e4, 0x7bf88806, 0x36108861, + 0xbb987583, 0x1aa17239, 0x97298fdb, 0xdac18fbc, 0x5749725e, + 0x41118f72, 0xcc997290, 0x817172f7, 0x0cf98f15, 0x18727bc2, + 0x95fa8620, 0xd8128647, 0x559a7ba5, 0x43c28689, 0xce4a7b6b, + 0x83a27b0c, 0x0e2a86ee, 0xaf138154, 0x229b7cb6, 0x6f737cd1, + 0xe2fb8133, 0xf4a37c1f, 0x792b81fd, 0x34c3819a, 0xb94b7c78, + 0x1dd46834, 0x905c95d6, 0xddb495b1, 0x503c6853, 0x4664957f, + 0xcbec689d, 0x860468fa, 0x0b8c9518, 0xaab592a2, 0x273d6f40, + 0x6ad56f27, 0xe75d92c5, 0xf1056fe9, 0x7c8d920b, 0x3165926c, + 0xbced6f8e, 0xa8669b59, 0x25ee66bb, 0x680666dc, 0xe58e9b3e, + 0xf3d66612, 0x7e5e9bf0, 0x33b69b97, 0xbe3e6675, 0x1f0761cf, + 0x928f9c2d, 0xdf679c4a, 0x52ef61a8, 0x44b79c84, 0xc93f6166, + 0x84d76101, 0x095f9ce3, 0x16984fd8, 0x9b10b23a, 0xd6f8b25d, + 0x5b704fbf, 0x4d28b293, 0xc0a04f71, 0x8d484f16, 0x00c0b2f4, + 0xa1f9b54e, 0x2c7148ac, 0x619948cb, 0xec11b529, 0xfa494805, + 0x77c1b5e7, 0x3a29b580, 0xb7a14862, 0xa32abcb5, 0x2ea24157, + 0x634a4130, 0xeec2bcd2, 0xf89a41fe, 0x7512bc1c, 0x38fabc7b, + 0xb5724199, 0x144b4623, 0x99c3bbc1, 0xd42bbba6, 0x59a34644, + 0x4ffbbb68, 0xc273468a, 0x8f9b46ed, 0x0213bb0f, 0xa68caf43, + 0x2b0452a1, 0x66ec52c6, 0xeb64af24, 0xfd3c5208, 0x70b4afea, + 0x3d5caf8d, 0xb0d4526f, 0x11ed55d5, 0x9c65a837, 0xd18da850, + 0x5c0555b2, 0x4a5da89e, 0xc7d5557c, 0x8a3d551b, 0x07b5a8f9, + 0x133e5c2e, 0x9eb6a1cc, 0xd35ea1ab, 0x5ed65c49, 0x488ea165, + 0xc5065c87, 0x88ee5ce0, 0x0566a102, 0xa45fa6b8, 0x29d75b5a, + 0x643f5b3d, 0xe9b7a6df, 0xffef5bf3, 0x7267a611, 0x3f8fa676, + 0xb2075b94}, + {0x00000000, 0x80f0171f, 0xda91287f, 0x5a613f60, 0x6e5356bf, + 0xeea341a0, 0xb4c27ec0, 0x343269df, 0xdca6ad7e, 0x5c56ba61, + 0x06378501, 0x86c7921e, 0xb2f5fbc1, 0x3205ecde, 0x6864d3be, + 0xe894c4a1, 0x623c5cbd, 0xe2cc4ba2, 0xb8ad74c2, 0x385d63dd, + 0x0c6f0a02, 0x8c9f1d1d, 0xd6fe227d, 0x560e3562, 0xbe9af1c3, + 0x3e6ae6dc, 0x640bd9bc, 0xe4fbcea3, 0xd0c9a77c, 0x5039b063, + 0x0a588f03, 0x8aa8981c, 0xc478b97a, 0x4488ae65, 0x1ee99105, + 0x9e19861a, 0xaa2befc5, 0x2adbf8da, 0x70bac7ba, 0xf04ad0a5, + 0x18de1404, 0x982e031b, 0xc24f3c7b, 0x42bf2b64, 0x768d42bb, + 0xf67d55a4, 0xac1c6ac4, 0x2cec7ddb, 0xa644e5c7, 0x26b4f2d8, + 0x7cd5cdb8, 0xfc25daa7, 0xc817b378, 0x48e7a467, 0x12869b07, + 0x92768c18, 0x7ae248b9, 0xfa125fa6, 0xa07360c6, 0x208377d9, + 0x14b11e06, 0x94410919, 0xce203679, 0x4ed02166, 0x538074b5, + 0xd37063aa, 0x89115cca, 0x09e14bd5, 0x3dd3220a, 0xbd233515, + 0xe7420a75, 0x67b21d6a, 0x8f26d9cb, 0x0fd6ced4, 0x55b7f1b4, + 0xd547e6ab, 0xe1758f74, 0x6185986b, 0x3be4a70b, 0xbb14b014, + 0x31bc2808, 0xb14c3f17, 0xeb2d0077, 0x6bdd1768, 0x5fef7eb7, + 0xdf1f69a8, 0x857e56c8, 0x058e41d7, 0xed1a8576, 0x6dea9269, + 0x378bad09, 0xb77bba16, 0x8349d3c9, 0x03b9c4d6, 0x59d8fbb6, + 0xd928eca9, 0x97f8cdcf, 0x1708dad0, 0x4d69e5b0, 0xcd99f2af, + 0xf9ab9b70, 0x795b8c6f, 0x233ab30f, 0xa3caa410, 0x4b5e60b1, + 0xcbae77ae, 0x91cf48ce, 0x113f5fd1, 0x250d360e, 0xa5fd2111, + 0xff9c1e71, 0x7f6c096e, 0xf5c49172, 0x7534866d, 0x2f55b90d, + 0xafa5ae12, 0x9b97c7cd, 0x1b67d0d2, 0x4106efb2, 0xc1f6f8ad, + 0x29623c0c, 0xa9922b13, 0xf3f31473, 0x7303036c, 0x47316ab3, + 0xc7c17dac, 0x9da042cc, 0x1d5055d3, 0xa700e96a, 0x27f0fe75, + 0x7d91c115, 0xfd61d60a, 0xc953bfd5, 0x49a3a8ca, 0x13c297aa, + 0x933280b5, 0x7ba64414, 0xfb56530b, 0xa1376c6b, 0x21c77b74, + 0x15f512ab, 0x950505b4, 0xcf643ad4, 0x4f942dcb, 0xc53cb5d7, + 0x45cca2c8, 0x1fad9da8, 0x9f5d8ab7, 0xab6fe368, 0x2b9ff477, + 0x71fecb17, 0xf10edc08, 0x199a18a9, 0x996a0fb6, 0xc30b30d6, + 0x43fb27c9, 0x77c94e16, 0xf7395909, 0xad586669, 0x2da87176, + 0x63785010, 0xe388470f, 0xb9e9786f, 0x39196f70, 0x0d2b06af, + 0x8ddb11b0, 0xd7ba2ed0, 0x574a39cf, 0xbfdefd6e, 0x3f2eea71, + 0x654fd511, 0xe5bfc20e, 0xd18dabd1, 0x517dbcce, 0x0b1c83ae, + 0x8bec94b1, 0x01440cad, 0x81b41bb2, 0xdbd524d2, 0x5b2533cd, + 0x6f175a12, 0xefe74d0d, 0xb586726d, 0x35766572, 0xdde2a1d3, + 0x5d12b6cc, 0x077389ac, 0x87839eb3, 0xb3b1f76c, 0x3341e073, + 0x6920df13, 0xe9d0c80c, 0xf4809ddf, 0x74708ac0, 0x2e11b5a0, + 0xaee1a2bf, 0x9ad3cb60, 0x1a23dc7f, 0x4042e31f, 0xc0b2f400, + 0x282630a1, 0xa8d627be, 0xf2b718de, 0x72470fc1, 0x4675661e, + 0xc6857101, 0x9ce44e61, 0x1c14597e, 0x96bcc162, 0x164cd67d, + 0x4c2de91d, 0xccddfe02, 0xf8ef97dd, 0x781f80c2, 0x227ebfa2, + 0xa28ea8bd, 0x4a1a6c1c, 0xcaea7b03, 0x908b4463, 0x107b537c, + 0x24493aa3, 0xa4b92dbc, 0xfed812dc, 0x7e2805c3, 0x30f824a5, + 0xb00833ba, 0xea690cda, 0x6a991bc5, 0x5eab721a, 0xde5b6505, + 0x843a5a65, 0x04ca4d7a, 0xec5e89db, 0x6cae9ec4, 0x36cfa1a4, + 0xb63fb6bb, 0x820ddf64, 0x02fdc87b, 0x589cf71b, 0xd86ce004, + 0x52c47818, 0xd2346f07, 0x88555067, 0x08a54778, 0x3c972ea7, + 0xbc6739b8, 0xe60606d8, 0x66f611c7, 0x8e62d566, 0x0e92c279, + 0x54f3fd19, 0xd403ea06, 0xe03183d9, 0x60c194c6, 0x3aa0aba6, + 0xba50bcb9}, + {0x00000000, 0x9570d495, 0xf190af6b, 0x64e07bfe, 0x38505897, + 0xad208c02, 0xc9c0f7fc, 0x5cb02369, 0x70a0b12e, 0xe5d065bb, + 0x81301e45, 0x1440cad0, 0x48f0e9b9, 0xdd803d2c, 0xb96046d2, + 0x2c109247, 0xe141625c, 0x7431b6c9, 0x10d1cd37, 0x85a119a2, + 0xd9113acb, 0x4c61ee5e, 0x288195a0, 0xbdf14135, 0x91e1d372, + 0x049107e7, 0x60717c19, 0xf501a88c, 0xa9b18be5, 0x3cc15f70, + 0x5821248e, 0xcd51f01b, 0x19f3c2f9, 0x8c83166c, 0xe8636d92, + 0x7d13b907, 0x21a39a6e, 0xb4d34efb, 0xd0333505, 0x4543e190, + 0x695373d7, 0xfc23a742, 0x98c3dcbc, 0x0db30829, 0x51032b40, + 0xc473ffd5, 0xa093842b, 0x35e350be, 0xf8b2a0a5, 0x6dc27430, + 0x09220fce, 0x9c52db5b, 0xc0e2f832, 0x55922ca7, 0x31725759, + 0xa40283cc, 0x8812118b, 0x1d62c51e, 0x7982bee0, 0xecf26a75, + 0xb042491c, 0x25329d89, 0x41d2e677, 0xd4a232e2, 0x33e785f2, + 0xa6975167, 0xc2772a99, 0x5707fe0c, 0x0bb7dd65, 0x9ec709f0, + 0xfa27720e, 0x6f57a69b, 0x434734dc, 0xd637e049, 0xb2d79bb7, + 0x27a74f22, 0x7b176c4b, 0xee67b8de, 0x8a87c320, 0x1ff717b5, + 0xd2a6e7ae, 0x47d6333b, 0x233648c5, 0xb6469c50, 0xeaf6bf39, + 0x7f866bac, 0x1b661052, 0x8e16c4c7, 0xa2065680, 0x37768215, + 0x5396f9eb, 0xc6e62d7e, 0x9a560e17, 0x0f26da82, 0x6bc6a17c, + 0xfeb675e9, 0x2a14470b, 0xbf64939e, 0xdb84e860, 0x4ef43cf5, + 0x12441f9c, 0x8734cb09, 0xe3d4b0f7, 0x76a46462, 0x5ab4f625, + 0xcfc422b0, 0xab24594e, 0x3e548ddb, 0x62e4aeb2, 0xf7947a27, + 0x937401d9, 0x0604d54c, 0xcb552557, 0x5e25f1c2, 0x3ac58a3c, + 0xafb55ea9, 0xf3057dc0, 0x6675a955, 0x0295d2ab, 0x97e5063e, + 0xbbf59479, 0x2e8540ec, 0x4a653b12, 0xdf15ef87, 0x83a5ccee, + 0x16d5187b, 0x72356385, 0xe745b710, 0x67cf0be4, 0xf2bfdf71, + 0x965fa48f, 0x032f701a, 0x5f9f5373, 0xcaef87e6, 0xae0ffc18, + 0x3b7f288d, 0x176fbaca, 0x821f6e5f, 0xe6ff15a1, 0x738fc134, + 0x2f3fe25d, 0xba4f36c8, 0xdeaf4d36, 0x4bdf99a3, 0x868e69b8, + 0x13febd2d, 0x771ec6d3, 0xe26e1246, 0xbede312f, 0x2baee5ba, + 0x4f4e9e44, 0xda3e4ad1, 0xf62ed896, 0x635e0c03, 0x07be77fd, + 0x92cea368, 0xce7e8001, 0x5b0e5494, 0x3fee2f6a, 0xaa9efbff, + 0x7e3cc91d, 0xeb4c1d88, 0x8fac6676, 0x1adcb2e3, 0x466c918a, + 0xd31c451f, 0xb7fc3ee1, 0x228cea74, 0x0e9c7833, 0x9becaca6, + 0xff0cd758, 0x6a7c03cd, 0x36cc20a4, 0xa3bcf431, 0xc75c8fcf, + 0x522c5b5a, 0x9f7dab41, 0x0a0d7fd4, 0x6eed042a, 0xfb9dd0bf, + 0xa72df3d6, 0x325d2743, 0x56bd5cbd, 0xc3cd8828, 0xefdd1a6f, + 0x7aadcefa, 0x1e4db504, 0x8b3d6191, 0xd78d42f8, 0x42fd966d, + 0x261ded93, 0xb36d3906, 0x54288e16, 0xc1585a83, 0xa5b8217d, + 0x30c8f5e8, 0x6c78d681, 0xf9080214, 0x9de879ea, 0x0898ad7f, + 0x24883f38, 0xb1f8ebad, 0xd5189053, 0x406844c6, 0x1cd867af, + 0x89a8b33a, 0xed48c8c4, 0x78381c51, 0xb569ec4a, 0x201938df, + 0x44f94321, 0xd18997b4, 0x8d39b4dd, 0x18496048, 0x7ca91bb6, + 0xe9d9cf23, 0xc5c95d64, 0x50b989f1, 0x3459f20f, 0xa129269a, + 0xfd9905f3, 0x68e9d166, 0x0c09aa98, 0x99797e0d, 0x4ddb4cef, + 0xd8ab987a, 0xbc4be384, 0x293b3711, 0x758b1478, 0xe0fbc0ed, + 0x841bbb13, 0x116b6f86, 0x3d7bfdc1, 0xa80b2954, 0xcceb52aa, + 0x599b863f, 0x052ba556, 0x905b71c3, 0xf4bb0a3d, 0x61cbdea8, + 0xac9a2eb3, 0x39eafa26, 0x5d0a81d8, 0xc87a554d, 0x94ca7624, + 0x01baa2b1, 0x655ad94f, 0xf02a0dda, 0xdc3a9f9d, 0x494a4b08, + 0x2daa30f6, 0xb8dae463, 0xe46ac70a, 0x711a139f, 0x15fa6861, + 0x808abcf4}, + {0x00000000, 0xcf9e17c8, 0x444d29d1, 0x8bd33e19, 0x889a53a2, + 0x4704446a, 0xccd77a73, 0x03496dbb, 0xca45a105, 0x05dbb6cd, + 0x8e0888d4, 0x41969f1c, 0x42dff2a7, 0x8d41e56f, 0x0692db76, + 0xc90cccbe, 0x4ffa444b, 0x80645383, 0x0bb76d9a, 0xc4297a52, + 0xc76017e9, 0x08fe0021, 0x832d3e38, 0x4cb329f0, 0x85bfe54e, + 0x4a21f286, 0xc1f2cc9f, 0x0e6cdb57, 0x0d25b6ec, 0xc2bba124, + 0x49689f3d, 0x86f688f5, 0x9ff48896, 0x506a9f5e, 0xdbb9a147, + 0x1427b68f, 0x176edb34, 0xd8f0ccfc, 0x5323f2e5, 0x9cbde52d, + 0x55b12993, 0x9a2f3e5b, 0x11fc0042, 0xde62178a, 0xdd2b7a31, + 0x12b56df9, 0x996653e0, 0x56f84428, 0xd00eccdd, 0x1f90db15, + 0x9443e50c, 0x5bddf2c4, 0x58949f7f, 0x970a88b7, 0x1cd9b6ae, + 0xd347a166, 0x1a4b6dd8, 0xd5d57a10, 0x5e064409, 0x919853c1, + 0x92d13e7a, 0x5d4f29b2, 0xd69c17ab, 0x19020063, 0xe498176d, + 0x2b0600a5, 0xa0d53ebc, 0x6f4b2974, 0x6c0244cf, 0xa39c5307, + 0x284f6d1e, 0xe7d17ad6, 0x2eddb668, 0xe143a1a0, 0x6a909fb9, + 0xa50e8871, 0xa647e5ca, 0x69d9f202, 0xe20acc1b, 0x2d94dbd3, + 0xab625326, 0x64fc44ee, 0xef2f7af7, 0x20b16d3f, 0x23f80084, + 0xec66174c, 0x67b52955, 0xa82b3e9d, 0x6127f223, 0xaeb9e5eb, + 0x256adbf2, 0xeaf4cc3a, 0xe9bda181, 0x2623b649, 0xadf08850, + 0x626e9f98, 0x7b6c9ffb, 0xb4f28833, 0x3f21b62a, 0xf0bfa1e2, + 0xf3f6cc59, 0x3c68db91, 0xb7bbe588, 0x7825f240, 0xb1293efe, + 0x7eb72936, 0xf564172f, 0x3afa00e7, 0x39b36d5c, 0xf62d7a94, + 0x7dfe448d, 0xb2605345, 0x3496dbb0, 0xfb08cc78, 0x70dbf261, + 0xbf45e5a9, 0xbc0c8812, 0x73929fda, 0xf841a1c3, 0x37dfb60b, + 0xfed37ab5, 0x314d6d7d, 0xba9e5364, 0x750044ac, 0x76492917, + 0xb9d73edf, 0x320400c6, 0xfd9a170e, 0x1241289b, 0xdddf3f53, + 0x560c014a, 0x99921682, 0x9adb7b39, 0x55456cf1, 0xde9652e8, + 0x11084520, 0xd804899e, 0x179a9e56, 0x9c49a04f, 0x53d7b787, + 0x509eda3c, 0x9f00cdf4, 0x14d3f3ed, 0xdb4de425, 0x5dbb6cd0, + 0x92257b18, 0x19f64501, 0xd66852c9, 0xd5213f72, 0x1abf28ba, + 0x916c16a3, 0x5ef2016b, 0x97fecdd5, 0x5860da1d, 0xd3b3e404, + 0x1c2df3cc, 0x1f649e77, 0xd0fa89bf, 0x5b29b7a6, 0x94b7a06e, + 0x8db5a00d, 0x422bb7c5, 0xc9f889dc, 0x06669e14, 0x052ff3af, + 0xcab1e467, 0x4162da7e, 0x8efccdb6, 0x47f00108, 0x886e16c0, + 0x03bd28d9, 0xcc233f11, 0xcf6a52aa, 0x00f44562, 0x8b277b7b, + 0x44b96cb3, 0xc24fe446, 0x0dd1f38e, 0x8602cd97, 0x499cda5f, + 0x4ad5b7e4, 0x854ba02c, 0x0e989e35, 0xc10689fd, 0x080a4543, + 0xc794528b, 0x4c476c92, 0x83d97b5a, 0x809016e1, 0x4f0e0129, + 0xc4dd3f30, 0x0b4328f8, 0xf6d93ff6, 0x3947283e, 0xb2941627, + 0x7d0a01ef, 0x7e436c54, 0xb1dd7b9c, 0x3a0e4585, 0xf590524d, + 0x3c9c9ef3, 0xf302893b, 0x78d1b722, 0xb74fa0ea, 0xb406cd51, + 0x7b98da99, 0xf04be480, 0x3fd5f348, 0xb9237bbd, 0x76bd6c75, + 0xfd6e526c, 0x32f045a4, 0x31b9281f, 0xfe273fd7, 0x75f401ce, + 0xba6a1606, 0x7366dab8, 0xbcf8cd70, 0x372bf369, 0xf8b5e4a1, + 0xfbfc891a, 0x34629ed2, 0xbfb1a0cb, 0x702fb703, 0x692db760, + 0xa6b3a0a8, 0x2d609eb1, 0xe2fe8979, 0xe1b7e4c2, 0x2e29f30a, + 0xa5facd13, 0x6a64dadb, 0xa3681665, 0x6cf601ad, 0xe7253fb4, + 0x28bb287c, 0x2bf245c7, 0xe46c520f, 0x6fbf6c16, 0xa0217bde, + 0x26d7f32b, 0xe949e4e3, 0x629adafa, 0xad04cd32, 0xae4da089, + 0x61d3b741, 0xea008958, 0x259e9e90, 0xec92522e, 0x230c45e6, + 0xa8df7bff, 0x67416c37, 0x6408018c, 0xab961644, 0x2045285d, + 0xefdb3f95}, + {0x00000000, 0x24825136, 0x4904a26c, 0x6d86f35a, 0x920944d8, + 0xb68b15ee, 0xdb0de6b4, 0xff8fb782, 0xff638ff1, 0xdbe1dec7, + 0xb6672d9d, 0x92e57cab, 0x6d6acb29, 0x49e89a1f, 0x246e6945, + 0x00ec3873, 0x25b619a3, 0x01344895, 0x6cb2bbcf, 0x4830eaf9, + 0xb7bf5d7b, 0x933d0c4d, 0xfebbff17, 0xda39ae21, 0xdad59652, + 0xfe57c764, 0x93d1343e, 0xb7536508, 0x48dcd28a, 0x6c5e83bc, + 0x01d870e6, 0x255a21d0, 0x4b6c3346, 0x6fee6270, 0x0268912a, + 0x26eac01c, 0xd965779e, 0xfde726a8, 0x9061d5f2, 0xb4e384c4, + 0xb40fbcb7, 0x908ded81, 0xfd0b1edb, 0xd9894fed, 0x2606f86f, + 0x0284a959, 0x6f025a03, 0x4b800b35, 0x6eda2ae5, 0x4a587bd3, + 0x27de8889, 0x035cd9bf, 0xfcd36e3d, 0xd8513f0b, 0xb5d7cc51, + 0x91559d67, 0x91b9a514, 0xb53bf422, 0xd8bd0778, 0xfc3f564e, + 0x03b0e1cc, 0x2732b0fa, 0x4ab443a0, 0x6e361296, 0x96d8668c, + 0xb25a37ba, 0xdfdcc4e0, 0xfb5e95d6, 0x04d12254, 0x20537362, + 0x4dd58038, 0x6957d10e, 0x69bbe97d, 0x4d39b84b, 0x20bf4b11, + 0x043d1a27, 0xfbb2ada5, 0xdf30fc93, 0xb2b60fc9, 0x96345eff, + 0xb36e7f2f, 0x97ec2e19, 0xfa6add43, 0xdee88c75, 0x21673bf7, + 0x05e56ac1, 0x6863999b, 0x4ce1c8ad, 0x4c0df0de, 0x688fa1e8, + 0x050952b2, 0x218b0384, 0xde04b406, 0xfa86e530, 0x9700166a, + 0xb382475c, 0xddb455ca, 0xf93604fc, 0x94b0f7a6, 0xb032a690, + 0x4fbd1112, 0x6b3f4024, 0x06b9b37e, 0x223be248, 0x22d7da3b, + 0x06558b0d, 0x6bd37857, 0x4f512961, 0xb0de9ee3, 0x945ccfd5, + 0xf9da3c8f, 0xdd586db9, 0xf8024c69, 0xdc801d5f, 0xb106ee05, + 0x9584bf33, 0x6a0b08b1, 0x4e895987, 0x230faadd, 0x078dfbeb, + 0x0761c398, 0x23e392ae, 0x4e6561f4, 0x6ae730c2, 0x95688740, + 0xb1ead676, 0xdc6c252c, 0xf8ee741a, 0xf6c1cb59, 0xd2439a6f, + 0xbfc56935, 0x9b473803, 0x64c88f81, 0x404adeb7, 0x2dcc2ded, + 0x094e7cdb, 0x09a244a8, 0x2d20159e, 0x40a6e6c4, 0x6424b7f2, + 0x9bab0070, 0xbf295146, 0xd2afa21c, 0xf62df32a, 0xd377d2fa, + 0xf7f583cc, 0x9a737096, 0xbef121a0, 0x417e9622, 0x65fcc714, + 0x087a344e, 0x2cf86578, 0x2c145d0b, 0x08960c3d, 0x6510ff67, + 0x4192ae51, 0xbe1d19d3, 0x9a9f48e5, 0xf719bbbf, 0xd39bea89, + 0xbdadf81f, 0x992fa929, 0xf4a95a73, 0xd02b0b45, 0x2fa4bcc7, + 0x0b26edf1, 0x66a01eab, 0x42224f9d, 0x42ce77ee, 0x664c26d8, + 0x0bcad582, 0x2f4884b4, 0xd0c73336, 0xf4456200, 0x99c3915a, + 0xbd41c06c, 0x981be1bc, 0xbc99b08a, 0xd11f43d0, 0xf59d12e6, + 0x0a12a564, 0x2e90f452, 0x43160708, 0x6794563e, 0x67786e4d, + 0x43fa3f7b, 0x2e7ccc21, 0x0afe9d17, 0xf5712a95, 0xd1f37ba3, + 0xbc7588f9, 0x98f7d9cf, 0x6019add5, 0x449bfce3, 0x291d0fb9, + 0x0d9f5e8f, 0xf210e90d, 0xd692b83b, 0xbb144b61, 0x9f961a57, + 0x9f7a2224, 0xbbf87312, 0xd67e8048, 0xf2fcd17e, 0x0d7366fc, + 0x29f137ca, 0x4477c490, 0x60f595a6, 0x45afb476, 0x612de540, + 0x0cab161a, 0x2829472c, 0xd7a6f0ae, 0xf324a198, 0x9ea252c2, + 0xba2003f4, 0xbacc3b87, 0x9e4e6ab1, 0xf3c899eb, 0xd74ac8dd, + 0x28c57f5f, 0x0c472e69, 0x61c1dd33, 0x45438c05, 0x2b759e93, + 0x0ff7cfa5, 0x62713cff, 0x46f36dc9, 0xb97cda4b, 0x9dfe8b7d, + 0xf0787827, 0xd4fa2911, 0xd4161162, 0xf0944054, 0x9d12b30e, + 0xb990e238, 0x461f55ba, 0x629d048c, 0x0f1bf7d6, 0x2b99a6e0, + 0x0ec38730, 0x2a41d606, 0x47c7255c, 0x6345746a, 0x9ccac3e8, + 0xb84892de, 0xd5ce6184, 0xf14c30b2, 0xf1a008c1, 0xd52259f7, + 0xb8a4aaad, 0x9c26fb9b, 0x63a94c19, 0x472b1d2f, 0x2aadee75, + 0x0e2fbf43}, + {0x00000000, 0x36f290f3, 0x6de521e6, 0x5b17b115, 0xdbca43cc, + 0xed38d33f, 0xb62f622a, 0x80ddf2d9, 0x6ce581d9, 0x5a17112a, + 0x0100a03f, 0x37f230cc, 0xb72fc215, 0x81dd52e6, 0xdacae3f3, + 0xec387300, 0xd9cb03b2, 0xef399341, 0xb42e2254, 0x82dcb2a7, + 0x0201407e, 0x34f3d08d, 0x6fe46198, 0x5916f16b, 0xb52e826b, + 0x83dc1298, 0xd8cba38d, 0xee39337e, 0x6ee4c1a7, 0x58165154, + 0x0301e041, 0x35f370b2, 0x68e70125, 0x5e1591d6, 0x050220c3, + 0x33f0b030, 0xb32d42e9, 0x85dfd21a, 0xdec8630f, 0xe83af3fc, + 0x040280fc, 0x32f0100f, 0x69e7a11a, 0x5f1531e9, 0xdfc8c330, + 0xe93a53c3, 0xb22de2d6, 0x84df7225, 0xb12c0297, 0x87de9264, + 0xdcc92371, 0xea3bb382, 0x6ae6415b, 0x5c14d1a8, 0x070360bd, + 0x31f1f04e, 0xddc9834e, 0xeb3b13bd, 0xb02ca2a8, 0x86de325b, + 0x0603c082, 0x30f15071, 0x6be6e164, 0x5d147197, 0xd1ce024a, + 0xe73c92b9, 0xbc2b23ac, 0x8ad9b35f, 0x0a044186, 0x3cf6d175, + 0x67e16060, 0x5113f093, 0xbd2b8393, 0x8bd91360, 0xd0cea275, + 0xe63c3286, 0x66e1c05f, 0x501350ac, 0x0b04e1b9, 0x3df6714a, + 0x080501f8, 0x3ef7910b, 0x65e0201e, 0x5312b0ed, 0xd3cf4234, + 0xe53dd2c7, 0xbe2a63d2, 0x88d8f321, 0x64e08021, 0x521210d2, + 0x0905a1c7, 0x3ff73134, 0xbf2ac3ed, 0x89d8531e, 0xd2cfe20b, + 0xe43d72f8, 0xb929036f, 0x8fdb939c, 0xd4cc2289, 0xe23eb27a, + 0x62e340a3, 0x5411d050, 0x0f066145, 0x39f4f1b6, 0xd5cc82b6, + 0xe33e1245, 0xb829a350, 0x8edb33a3, 0x0e06c17a, 0x38f45189, + 0x63e3e09c, 0x5511706f, 0x60e200dd, 0x5610902e, 0x0d07213b, + 0x3bf5b1c8, 0xbb284311, 0x8ddad3e2, 0xd6cd62f7, 0xe03ff204, + 0x0c078104, 0x3af511f7, 0x61e2a0e2, 0x57103011, 0xd7cdc2c8, + 0xe13f523b, 0xba28e32e, 0x8cda73dd, 0x78ed02d5, 0x4e1f9226, + 0x15082333, 0x23fab3c0, 0xa3274119, 0x95d5d1ea, 0xcec260ff, + 0xf830f00c, 0x1408830c, 0x22fa13ff, 0x79eda2ea, 0x4f1f3219, + 0xcfc2c0c0, 0xf9305033, 0xa227e126, 0x94d571d5, 0xa1260167, + 0x97d49194, 0xccc32081, 0xfa31b072, 0x7aec42ab, 0x4c1ed258, + 0x1709634d, 0x21fbf3be, 0xcdc380be, 0xfb31104d, 0xa026a158, + 0x96d431ab, 0x1609c372, 0x20fb5381, 0x7bece294, 0x4d1e7267, + 0x100a03f0, 0x26f89303, 0x7def2216, 0x4b1db2e5, 0xcbc0403c, + 0xfd32d0cf, 0xa62561da, 0x90d7f129, 0x7cef8229, 0x4a1d12da, + 0x110aa3cf, 0x27f8333c, 0xa725c1e5, 0x91d75116, 0xcac0e003, + 0xfc3270f0, 0xc9c10042, 0xff3390b1, 0xa42421a4, 0x92d6b157, + 0x120b438e, 0x24f9d37d, 0x7fee6268, 0x491cf29b, 0xa524819b, + 0x93d61168, 0xc8c1a07d, 0xfe33308e, 0x7eeec257, 0x481c52a4, + 0x130be3b1, 0x25f97342, 0xa923009f, 0x9fd1906c, 0xc4c62179, + 0xf234b18a, 0x72e94353, 0x441bd3a0, 0x1f0c62b5, 0x29fef246, + 0xc5c68146, 0xf33411b5, 0xa823a0a0, 0x9ed13053, 0x1e0cc28a, + 0x28fe5279, 0x73e9e36c, 0x451b739f, 0x70e8032d, 0x461a93de, + 0x1d0d22cb, 0x2bffb238, 0xab2240e1, 0x9dd0d012, 0xc6c76107, + 0xf035f1f4, 0x1c0d82f4, 0x2aff1207, 0x71e8a312, 0x471a33e1, + 0xc7c7c138, 0xf13551cb, 0xaa22e0de, 0x9cd0702d, 0xc1c401ba, + 0xf7369149, 0xac21205c, 0x9ad3b0af, 0x1a0e4276, 0x2cfcd285, + 0x77eb6390, 0x4119f363, 0xad218063, 0x9bd31090, 0xc0c4a185, + 0xf6363176, 0x76ebc3af, 0x4019535c, 0x1b0ee249, 0x2dfc72ba, + 0x180f0208, 0x2efd92fb, 0x75ea23ee, 0x4318b31d, 0xc3c541c4, + 0xf537d137, 0xae206022, 0x98d2f0d1, 0x74ea83d1, 0x42181322, + 0x190fa237, 0x2ffd32c4, 0xaf20c01d, 0x99d250ee, 0xc2c5e1fb, + 0xf4377108}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0xf390f23600000000, 0xe621e56d00000000, + 0x15b1175b00000000, 0xcc43cadb00000000, 0x3fd338ed00000000, + 0x2a622fb600000000, 0xd9f2dd8000000000, 0xd981e56c00000000, + 0x2a11175a00000000, 0x3fa0000100000000, 0xcc30f23700000000, + 0x15c22fb700000000, 0xe652dd8100000000, 0xf3e3cada00000000, + 0x007338ec00000000, 0xb203cbd900000000, 0x419339ef00000000, + 0x54222eb400000000, 0xa7b2dc8200000000, 0x7e40010200000000, + 0x8dd0f33400000000, 0x9861e46f00000000, 0x6bf1165900000000, + 0x6b822eb500000000, 0x9812dc8300000000, 0x8da3cbd800000000, + 0x7e3339ee00000000, 0xa7c1e46e00000000, 0x5451165800000000, + 0x41e0010300000000, 0xb270f33500000000, 0x2501e76800000000, + 0xd691155e00000000, 0xc320020500000000, 0x30b0f03300000000, + 0xe9422db300000000, 0x1ad2df8500000000, 0x0f63c8de00000000, + 0xfcf33ae800000000, 0xfc80020400000000, 0x0f10f03200000000, + 0x1aa1e76900000000, 0xe931155f00000000, 0x30c3c8df00000000, + 0xc3533ae900000000, 0xd6e22db200000000, 0x2572df8400000000, + 0x97022cb100000000, 0x6492de8700000000, 0x7123c9dc00000000, + 0x82b33bea00000000, 0x5b41e66a00000000, 0xa8d1145c00000000, + 0xbd60030700000000, 0x4ef0f13100000000, 0x4e83c9dd00000000, + 0xbd133beb00000000, 0xa8a22cb000000000, 0x5b32de8600000000, + 0x82c0030600000000, 0x7150f13000000000, 0x64e1e66b00000000, + 0x9771145d00000000, 0x4a02ced100000000, 0xb9923ce700000000, + 0xac232bbc00000000, 0x5fb3d98a00000000, 0x8641040a00000000, + 0x75d1f63c00000000, 0x6060e16700000000, 0x93f0135100000000, + 0x93832bbd00000000, 0x6013d98b00000000, 0x75a2ced000000000, + 0x86323ce600000000, 0x5fc0e16600000000, 0xac50135000000000, + 0xb9e1040b00000000, 0x4a71f63d00000000, 0xf801050800000000, + 0x0b91f73e00000000, 0x1e20e06500000000, 0xedb0125300000000, + 0x3442cfd300000000, 0xc7d23de500000000, 0xd2632abe00000000, + 0x21f3d88800000000, 0x2180e06400000000, 0xd210125200000000, + 0xc7a1050900000000, 0x3431f73f00000000, 0xedc32abf00000000, + 0x1e53d88900000000, 0x0be2cfd200000000, 0xf8723de400000000, + 0x6f0329b900000000, 0x9c93db8f00000000, 0x8922ccd400000000, + 0x7ab23ee200000000, 0xa340e36200000000, 0x50d0115400000000, + 0x4561060f00000000, 0xb6f1f43900000000, 0xb682ccd500000000, + 0x45123ee300000000, 0x50a329b800000000, 0xa333db8e00000000, + 0x7ac1060e00000000, 0x8951f43800000000, 0x9ce0e36300000000, + 0x6f70115500000000, 0xdd00e26000000000, 0x2e90105600000000, + 0x3b21070d00000000, 0xc8b1f53b00000000, 0x114328bb00000000, + 0xe2d3da8d00000000, 0xf762cdd600000000, 0x04f23fe000000000, + 0x0481070c00000000, 0xf711f53a00000000, 0xe2a0e26100000000, + 0x1130105700000000, 0xc8c2cdd700000000, 0x3b523fe100000000, + 0x2ee328ba00000000, 0xdd73da8c00000000, 0xd502ed7800000000, + 0x26921f4e00000000, 0x3323081500000000, 0xc0b3fa2300000000, + 0x194127a300000000, 0xead1d59500000000, 0xff60c2ce00000000, + 0x0cf030f800000000, 0x0c83081400000000, 0xff13fa2200000000, + 0xeaa2ed7900000000, 0x19321f4f00000000, 0xc0c0c2cf00000000, + 0x335030f900000000, 0x26e127a200000000, 0xd571d59400000000, + 0x670126a100000000, 0x9491d49700000000, 0x8120c3cc00000000, + 0x72b031fa00000000, 0xab42ec7a00000000, 0x58d21e4c00000000, + 0x4d63091700000000, 0xbef3fb2100000000, 0xbe80c3cd00000000, + 0x4d1031fb00000000, 0x58a126a000000000, 0xab31d49600000000, + 0x72c3091600000000, 0x8153fb2000000000, 0x94e2ec7b00000000, + 0x67721e4d00000000, 0xf0030a1000000000, 0x0393f82600000000, + 0x1622ef7d00000000, 0xe5b21d4b00000000, 0x3c40c0cb00000000, + 0xcfd032fd00000000, 0xda6125a600000000, 0x29f1d79000000000, + 0x2982ef7c00000000, 0xda121d4a00000000, 0xcfa30a1100000000, + 0x3c33f82700000000, 0xe5c125a700000000, 0x1651d79100000000, + 0x03e0c0ca00000000, 0xf07032fc00000000, 0x4200c1c900000000, + 0xb19033ff00000000, 0xa42124a400000000, 0x57b1d69200000000, + 0x8e430b1200000000, 0x7dd3f92400000000, 0x6862ee7f00000000, + 0x9bf21c4900000000, 0x9b8124a500000000, 0x6811d69300000000, + 0x7da0c1c800000000, 0x8e3033fe00000000, 0x57c2ee7e00000000, + 0xa4521c4800000000, 0xb1e30b1300000000, 0x4273f92500000000, + 0x9f0023a900000000, 0x6c90d19f00000000, 0x7921c6c400000000, + 0x8ab134f200000000, 0x5343e97200000000, 0xa0d31b4400000000, + 0xb5620c1f00000000, 0x46f2fe2900000000, 0x4681c6c500000000, + 0xb51134f300000000, 0xa0a023a800000000, 0x5330d19e00000000, + 0x8ac20c1e00000000, 0x7952fe2800000000, 0x6ce3e97300000000, + 0x9f731b4500000000, 0x2d03e87000000000, 0xde931a4600000000, + 0xcb220d1d00000000, 0x38b2ff2b00000000, 0xe14022ab00000000, + 0x12d0d09d00000000, 0x0761c7c600000000, 0xf4f135f000000000, + 0xf4820d1c00000000, 0x0712ff2a00000000, 0x12a3e87100000000, + 0xe1331a4700000000, 0x38c1c7c700000000, 0xcb5135f100000000, + 0xdee022aa00000000, 0x2d70d09c00000000, 0xba01c4c100000000, + 0x499136f700000000, 0x5c2021ac00000000, 0xafb0d39a00000000, + 0x76420e1a00000000, 0x85d2fc2c00000000, 0x9063eb7700000000, + 0x63f3194100000000, 0x638021ad00000000, 0x9010d39b00000000, + 0x85a1c4c000000000, 0x763136f600000000, 0xafc3eb7600000000, + 0x5c53194000000000, 0x49e20e1b00000000, 0xba72fc2d00000000, + 0x08020f1800000000, 0xfb92fd2e00000000, 0xee23ea7500000000, + 0x1db3184300000000, 0xc441c5c300000000, 0x37d137f500000000, + 0x226020ae00000000, 0xd1f0d29800000000, 0xd183ea7400000000, + 0x2213184200000000, 0x37a20f1900000000, 0xc432fd2f00000000, + 0x1dc020af00000000, 0xee50d29900000000, 0xfbe1c5c200000000, + 0x087137f400000000}, + {0x0000000000000000, 0x3651822400000000, 0x6ca2044900000000, + 0x5af3866d00000000, 0xd844099200000000, 0xee158bb600000000, + 0xb4e60ddb00000000, 0x82b78fff00000000, 0xf18f63ff00000000, + 0xc7dee1db00000000, 0x9d2d67b600000000, 0xab7ce59200000000, + 0x29cb6a6d00000000, 0x1f9ae84900000000, 0x45696e2400000000, + 0x7338ec0000000000, 0xa319b62500000000, 0x9548340100000000, + 0xcfbbb26c00000000, 0xf9ea304800000000, 0x7b5dbfb700000000, + 0x4d0c3d9300000000, 0x17ffbbfe00000000, 0x21ae39da00000000, + 0x5296d5da00000000, 0x64c757fe00000000, 0x3e34d19300000000, + 0x086553b700000000, 0x8ad2dc4800000000, 0xbc835e6c00000000, + 0xe670d80100000000, 0xd0215a2500000000, 0x46336c4b00000000, + 0x7062ee6f00000000, 0x2a91680200000000, 0x1cc0ea2600000000, + 0x9e7765d900000000, 0xa826e7fd00000000, 0xf2d5619000000000, + 0xc484e3b400000000, 0xb7bc0fb400000000, 0x81ed8d9000000000, + 0xdb1e0bfd00000000, 0xed4f89d900000000, 0x6ff8062600000000, + 0x59a9840200000000, 0x035a026f00000000, 0x350b804b00000000, + 0xe52ada6e00000000, 0xd37b584a00000000, 0x8988de2700000000, + 0xbfd95c0300000000, 0x3d6ed3fc00000000, 0x0b3f51d800000000, + 0x51ccd7b500000000, 0x679d559100000000, 0x14a5b99100000000, + 0x22f43bb500000000, 0x7807bdd800000000, 0x4e563ffc00000000, + 0xcce1b00300000000, 0xfab0322700000000, 0xa043b44a00000000, + 0x9612366e00000000, 0x8c66d89600000000, 0xba375ab200000000, + 0xe0c4dcdf00000000, 0xd6955efb00000000, 0x5422d10400000000, + 0x6273532000000000, 0x3880d54d00000000, 0x0ed1576900000000, + 0x7de9bb6900000000, 0x4bb8394d00000000, 0x114bbf2000000000, + 0x271a3d0400000000, 0xa5adb2fb00000000, 0x93fc30df00000000, + 0xc90fb6b200000000, 0xff5e349600000000, 0x2f7f6eb300000000, + 0x192eec9700000000, 0x43dd6afa00000000, 0x758ce8de00000000, + 0xf73b672100000000, 0xc16ae50500000000, 0x9b99636800000000, + 0xadc8e14c00000000, 0xdef00d4c00000000, 0xe8a18f6800000000, + 0xb252090500000000, 0x84038b2100000000, 0x06b404de00000000, + 0x30e586fa00000000, 0x6a16009700000000, 0x5c4782b300000000, + 0xca55b4dd00000000, 0xfc0436f900000000, 0xa6f7b09400000000, + 0x90a632b000000000, 0x1211bd4f00000000, 0x24403f6b00000000, + 0x7eb3b90600000000, 0x48e23b2200000000, 0x3bdad72200000000, + 0x0d8b550600000000, 0x5778d36b00000000, 0x6129514f00000000, + 0xe39edeb000000000, 0xd5cf5c9400000000, 0x8f3cdaf900000000, + 0xb96d58dd00000000, 0x694c02f800000000, 0x5f1d80dc00000000, + 0x05ee06b100000000, 0x33bf849500000000, 0xb1080b6a00000000, + 0x8759894e00000000, 0xddaa0f2300000000, 0xebfb8d0700000000, + 0x98c3610700000000, 0xae92e32300000000, 0xf461654e00000000, + 0xc230e76a00000000, 0x4087689500000000, 0x76d6eab100000000, + 0x2c256cdc00000000, 0x1a74eef800000000, 0x59cbc1f600000000, + 0x6f9a43d200000000, 0x3569c5bf00000000, 0x0338479b00000000, + 0x818fc86400000000, 0xb7de4a4000000000, 0xed2dcc2d00000000, + 0xdb7c4e0900000000, 0xa844a20900000000, 0x9e15202d00000000, + 0xc4e6a64000000000, 0xf2b7246400000000, 0x7000ab9b00000000, + 0x465129bf00000000, 0x1ca2afd200000000, 0x2af32df600000000, + 0xfad277d300000000, 0xcc83f5f700000000, 0x9670739a00000000, + 0xa021f1be00000000, 0x22967e4100000000, 0x14c7fc6500000000, + 0x4e347a0800000000, 0x7865f82c00000000, 0x0b5d142c00000000, + 0x3d0c960800000000, 0x67ff106500000000, 0x51ae924100000000, + 0xd3191dbe00000000, 0xe5489f9a00000000, 0xbfbb19f700000000, + 0x89ea9bd300000000, 0x1ff8adbd00000000, 0x29a92f9900000000, + 0x735aa9f400000000, 0x450b2bd000000000, 0xc7bca42f00000000, + 0xf1ed260b00000000, 0xab1ea06600000000, 0x9d4f224200000000, + 0xee77ce4200000000, 0xd8264c6600000000, 0x82d5ca0b00000000, + 0xb484482f00000000, 0x3633c7d000000000, 0x006245f400000000, + 0x5a91c39900000000, 0x6cc041bd00000000, 0xbce11b9800000000, + 0x8ab099bc00000000, 0xd0431fd100000000, 0xe6129df500000000, + 0x64a5120a00000000, 0x52f4902e00000000, 0x0807164300000000, + 0x3e56946700000000, 0x4d6e786700000000, 0x7b3ffa4300000000, + 0x21cc7c2e00000000, 0x179dfe0a00000000, 0x952a71f500000000, + 0xa37bf3d100000000, 0xf98875bc00000000, 0xcfd9f79800000000, + 0xd5ad196000000000, 0xe3fc9b4400000000, 0xb90f1d2900000000, + 0x8f5e9f0d00000000, 0x0de910f200000000, 0x3bb892d600000000, + 0x614b14bb00000000, 0x571a969f00000000, 0x24227a9f00000000, + 0x1273f8bb00000000, 0x48807ed600000000, 0x7ed1fcf200000000, + 0xfc66730d00000000, 0xca37f12900000000, 0x90c4774400000000, + 0xa695f56000000000, 0x76b4af4500000000, 0x40e52d6100000000, + 0x1a16ab0c00000000, 0x2c47292800000000, 0xaef0a6d700000000, + 0x98a124f300000000, 0xc252a29e00000000, 0xf40320ba00000000, + 0x873bccba00000000, 0xb16a4e9e00000000, 0xeb99c8f300000000, + 0xddc84ad700000000, 0x5f7fc52800000000, 0x692e470c00000000, + 0x33ddc16100000000, 0x058c434500000000, 0x939e752b00000000, + 0xa5cff70f00000000, 0xff3c716200000000, 0xc96df34600000000, + 0x4bda7cb900000000, 0x7d8bfe9d00000000, 0x277878f000000000, + 0x1129fad400000000, 0x621116d400000000, 0x544094f000000000, + 0x0eb3129d00000000, 0x38e290b900000000, 0xba551f4600000000, + 0x8c049d6200000000, 0xd6f71b0f00000000, 0xe0a6992b00000000, + 0x3087c30e00000000, 0x06d6412a00000000, 0x5c25c74700000000, + 0x6a74456300000000, 0xe8c3ca9c00000000, 0xde9248b800000000, + 0x8461ced500000000, 0xb2304cf100000000, 0xc108a0f100000000, + 0xf75922d500000000, 0xadaaa4b800000000, 0x9bfb269c00000000, + 0x194ca96300000000, 0x2f1d2b4700000000, 0x75eead2a00000000, + 0x43bf2f0e00000000}, + {0x0000000000000000, 0xc8179ecf00000000, 0xd1294d4400000000, + 0x193ed38b00000000, 0xa2539a8800000000, 0x6a44044700000000, + 0x737ad7cc00000000, 0xbb6d490300000000, 0x05a145ca00000000, + 0xcdb6db0500000000, 0xd488088e00000000, 0x1c9f964100000000, + 0xa7f2df4200000000, 0x6fe5418d00000000, 0x76db920600000000, + 0xbecc0cc900000000, 0x4b44fa4f00000000, 0x8353648000000000, + 0x9a6db70b00000000, 0x527a29c400000000, 0xe91760c700000000, + 0x2100fe0800000000, 0x383e2d8300000000, 0xf029b34c00000000, + 0x4ee5bf8500000000, 0x86f2214a00000000, 0x9fccf2c100000000, + 0x57db6c0e00000000, 0xecb6250d00000000, 0x24a1bbc200000000, + 0x3d9f684900000000, 0xf588f68600000000, 0x9688f49f00000000, + 0x5e9f6a5000000000, 0x47a1b9db00000000, 0x8fb6271400000000, + 0x34db6e1700000000, 0xfcccf0d800000000, 0xe5f2235300000000, + 0x2de5bd9c00000000, 0x9329b15500000000, 0x5b3e2f9a00000000, + 0x4200fc1100000000, 0x8a1762de00000000, 0x317a2bdd00000000, + 0xf96db51200000000, 0xe053669900000000, 0x2844f85600000000, + 0xddcc0ed000000000, 0x15db901f00000000, 0x0ce5439400000000, + 0xc4f2dd5b00000000, 0x7f9f945800000000, 0xb7880a9700000000, + 0xaeb6d91c00000000, 0x66a147d300000000, 0xd86d4b1a00000000, + 0x107ad5d500000000, 0x0944065e00000000, 0xc153989100000000, + 0x7a3ed19200000000, 0xb2294f5d00000000, 0xab179cd600000000, + 0x6300021900000000, 0x6d1798e400000000, 0xa500062b00000000, + 0xbc3ed5a000000000, 0x74294b6f00000000, 0xcf44026c00000000, + 0x07539ca300000000, 0x1e6d4f2800000000, 0xd67ad1e700000000, + 0x68b6dd2e00000000, 0xa0a143e100000000, 0xb99f906a00000000, + 0x71880ea500000000, 0xcae547a600000000, 0x02f2d96900000000, + 0x1bcc0ae200000000, 0xd3db942d00000000, 0x265362ab00000000, + 0xee44fc6400000000, 0xf77a2fef00000000, 0x3f6db12000000000, + 0x8400f82300000000, 0x4c1766ec00000000, 0x5529b56700000000, + 0x9d3e2ba800000000, 0x23f2276100000000, 0xebe5b9ae00000000, + 0xf2db6a2500000000, 0x3accf4ea00000000, 0x81a1bde900000000, + 0x49b6232600000000, 0x5088f0ad00000000, 0x989f6e6200000000, + 0xfb9f6c7b00000000, 0x3388f2b400000000, 0x2ab6213f00000000, + 0xe2a1bff000000000, 0x59ccf6f300000000, 0x91db683c00000000, + 0x88e5bbb700000000, 0x40f2257800000000, 0xfe3e29b100000000, + 0x3629b77e00000000, 0x2f1764f500000000, 0xe700fa3a00000000, + 0x5c6db33900000000, 0x947a2df600000000, 0x8d44fe7d00000000, + 0x455360b200000000, 0xb0db963400000000, 0x78cc08fb00000000, + 0x61f2db7000000000, 0xa9e545bf00000000, 0x12880cbc00000000, + 0xda9f927300000000, 0xc3a141f800000000, 0x0bb6df3700000000, + 0xb57ad3fe00000000, 0x7d6d4d3100000000, 0x64539eba00000000, + 0xac44007500000000, 0x1729497600000000, 0xdf3ed7b900000000, + 0xc600043200000000, 0x0e179afd00000000, 0x9b28411200000000, + 0x533fdfdd00000000, 0x4a010c5600000000, 0x8216929900000000, + 0x397bdb9a00000000, 0xf16c455500000000, 0xe85296de00000000, + 0x2045081100000000, 0x9e8904d800000000, 0x569e9a1700000000, + 0x4fa0499c00000000, 0x87b7d75300000000, 0x3cda9e5000000000, + 0xf4cd009f00000000, 0xedf3d31400000000, 0x25e44ddb00000000, + 0xd06cbb5d00000000, 0x187b259200000000, 0x0145f61900000000, + 0xc95268d600000000, 0x723f21d500000000, 0xba28bf1a00000000, + 0xa3166c9100000000, 0x6b01f25e00000000, 0xd5cdfe9700000000, + 0x1dda605800000000, 0x04e4b3d300000000, 0xccf32d1c00000000, + 0x779e641f00000000, 0xbf89fad000000000, 0xa6b7295b00000000, + 0x6ea0b79400000000, 0x0da0b58d00000000, 0xc5b72b4200000000, + 0xdc89f8c900000000, 0x149e660600000000, 0xaff32f0500000000, + 0x67e4b1ca00000000, 0x7eda624100000000, 0xb6cdfc8e00000000, + 0x0801f04700000000, 0xc0166e8800000000, 0xd928bd0300000000, + 0x113f23cc00000000, 0xaa526acf00000000, 0x6245f40000000000, + 0x7b7b278b00000000, 0xb36cb94400000000, 0x46e44fc200000000, + 0x8ef3d10d00000000, 0x97cd028600000000, 0x5fda9c4900000000, + 0xe4b7d54a00000000, 0x2ca04b8500000000, 0x359e980e00000000, + 0xfd8906c100000000, 0x43450a0800000000, 0x8b5294c700000000, + 0x926c474c00000000, 0x5a7bd98300000000, 0xe116908000000000, + 0x29010e4f00000000, 0x303fddc400000000, 0xf828430b00000000, + 0xf63fd9f600000000, 0x3e28473900000000, 0x271694b200000000, + 0xef010a7d00000000, 0x546c437e00000000, 0x9c7bddb100000000, + 0x85450e3a00000000, 0x4d5290f500000000, 0xf39e9c3c00000000, + 0x3b8902f300000000, 0x22b7d17800000000, 0xeaa04fb700000000, + 0x51cd06b400000000, 0x99da987b00000000, 0x80e44bf000000000, + 0x48f3d53f00000000, 0xbd7b23b900000000, 0x756cbd7600000000, + 0x6c526efd00000000, 0xa445f03200000000, 0x1f28b93100000000, + 0xd73f27fe00000000, 0xce01f47500000000, 0x06166aba00000000, + 0xb8da667300000000, 0x70cdf8bc00000000, 0x69f32b3700000000, + 0xa1e4b5f800000000, 0x1a89fcfb00000000, 0xd29e623400000000, + 0xcba0b1bf00000000, 0x03b72f7000000000, 0x60b72d6900000000, + 0xa8a0b3a600000000, 0xb19e602d00000000, 0x7989fee200000000, + 0xc2e4b7e100000000, 0x0af3292e00000000, 0x13cdfaa500000000, + 0xdbda646a00000000, 0x651668a300000000, 0xad01f66c00000000, + 0xb43f25e700000000, 0x7c28bb2800000000, 0xc745f22b00000000, + 0x0f526ce400000000, 0x166cbf6f00000000, 0xde7b21a000000000, + 0x2bf3d72600000000, 0xe3e449e900000000, 0xfada9a6200000000, + 0x32cd04ad00000000, 0x89a04dae00000000, 0x41b7d36100000000, + 0x588900ea00000000, 0x909e9e2500000000, 0x2e5292ec00000000, + 0xe6450c2300000000, 0xff7bdfa800000000, 0x376c416700000000, + 0x8c01086400000000, 0x441696ab00000000, 0x5d28452000000000, + 0x953fdbef00000000}, + {0x0000000000000000, 0x95d4709500000000, 0x6baf90f100000000, + 0xfe7be06400000000, 0x9758503800000000, 0x028c20ad00000000, + 0xfcf7c0c900000000, 0x6923b05c00000000, 0x2eb1a07000000000, + 0xbb65d0e500000000, 0x451e308100000000, 0xd0ca401400000000, + 0xb9e9f04800000000, 0x2c3d80dd00000000, 0xd24660b900000000, + 0x4792102c00000000, 0x5c6241e100000000, 0xc9b6317400000000, + 0x37cdd11000000000, 0xa219a18500000000, 0xcb3a11d900000000, + 0x5eee614c00000000, 0xa095812800000000, 0x3541f1bd00000000, + 0x72d3e19100000000, 0xe707910400000000, 0x197c716000000000, + 0x8ca801f500000000, 0xe58bb1a900000000, 0x705fc13c00000000, + 0x8e24215800000000, 0x1bf051cd00000000, 0xf9c2f31900000000, + 0x6c16838c00000000, 0x926d63e800000000, 0x07b9137d00000000, + 0x6e9aa32100000000, 0xfb4ed3b400000000, 0x053533d000000000, + 0x90e1434500000000, 0xd773536900000000, 0x42a723fc00000000, + 0xbcdcc39800000000, 0x2908b30d00000000, 0x402b035100000000, + 0xd5ff73c400000000, 0x2b8493a000000000, 0xbe50e33500000000, + 0xa5a0b2f800000000, 0x3074c26d00000000, 0xce0f220900000000, + 0x5bdb529c00000000, 0x32f8e2c000000000, 0xa72c925500000000, + 0x5957723100000000, 0xcc8302a400000000, 0x8b11128800000000, + 0x1ec5621d00000000, 0xe0be827900000000, 0x756af2ec00000000, + 0x1c4942b000000000, 0x899d322500000000, 0x77e6d24100000000, + 0xe232a2d400000000, 0xf285e73300000000, 0x675197a600000000, + 0x992a77c200000000, 0x0cfe075700000000, 0x65ddb70b00000000, + 0xf009c79e00000000, 0x0e7227fa00000000, 0x9ba6576f00000000, + 0xdc34474300000000, 0x49e037d600000000, 0xb79bd7b200000000, + 0x224fa72700000000, 0x4b6c177b00000000, 0xdeb867ee00000000, + 0x20c3878a00000000, 0xb517f71f00000000, 0xaee7a6d200000000, + 0x3b33d64700000000, 0xc548362300000000, 0x509c46b600000000, + 0x39bff6ea00000000, 0xac6b867f00000000, 0x5210661b00000000, + 0xc7c4168e00000000, 0x805606a200000000, 0x1582763700000000, + 0xebf9965300000000, 0x7e2de6c600000000, 0x170e569a00000000, + 0x82da260f00000000, 0x7ca1c66b00000000, 0xe975b6fe00000000, + 0x0b47142a00000000, 0x9e9364bf00000000, 0x60e884db00000000, + 0xf53cf44e00000000, 0x9c1f441200000000, 0x09cb348700000000, + 0xf7b0d4e300000000, 0x6264a47600000000, 0x25f6b45a00000000, + 0xb022c4cf00000000, 0x4e5924ab00000000, 0xdb8d543e00000000, + 0xb2aee46200000000, 0x277a94f700000000, 0xd901749300000000, + 0x4cd5040600000000, 0x572555cb00000000, 0xc2f1255e00000000, + 0x3c8ac53a00000000, 0xa95eb5af00000000, 0xc07d05f300000000, + 0x55a9756600000000, 0xabd2950200000000, 0x3e06e59700000000, + 0x7994f5bb00000000, 0xec40852e00000000, 0x123b654a00000000, + 0x87ef15df00000000, 0xeecca58300000000, 0x7b18d51600000000, + 0x8563357200000000, 0x10b745e700000000, 0xe40bcf6700000000, + 0x71dfbff200000000, 0x8fa45f9600000000, 0x1a702f0300000000, + 0x73539f5f00000000, 0xe687efca00000000, 0x18fc0fae00000000, + 0x8d287f3b00000000, 0xcaba6f1700000000, 0x5f6e1f8200000000, + 0xa115ffe600000000, 0x34c18f7300000000, 0x5de23f2f00000000, + 0xc8364fba00000000, 0x364dafde00000000, 0xa399df4b00000000, + 0xb8698e8600000000, 0x2dbdfe1300000000, 0xd3c61e7700000000, + 0x46126ee200000000, 0x2f31debe00000000, 0xbae5ae2b00000000, + 0x449e4e4f00000000, 0xd14a3eda00000000, 0x96d82ef600000000, + 0x030c5e6300000000, 0xfd77be0700000000, 0x68a3ce9200000000, + 0x01807ece00000000, 0x94540e5b00000000, 0x6a2fee3f00000000, + 0xfffb9eaa00000000, 0x1dc93c7e00000000, 0x881d4ceb00000000, + 0x7666ac8f00000000, 0xe3b2dc1a00000000, 0x8a916c4600000000, + 0x1f451cd300000000, 0xe13efcb700000000, 0x74ea8c2200000000, + 0x33789c0e00000000, 0xa6acec9b00000000, 0x58d70cff00000000, + 0xcd037c6a00000000, 0xa420cc3600000000, 0x31f4bca300000000, + 0xcf8f5cc700000000, 0x5a5b2c5200000000, 0x41ab7d9f00000000, + 0xd47f0d0a00000000, 0x2a04ed6e00000000, 0xbfd09dfb00000000, + 0xd6f32da700000000, 0x43275d3200000000, 0xbd5cbd5600000000, + 0x2888cdc300000000, 0x6f1addef00000000, 0xfacead7a00000000, + 0x04b54d1e00000000, 0x91613d8b00000000, 0xf8428dd700000000, + 0x6d96fd4200000000, 0x93ed1d2600000000, 0x06396db300000000, + 0x168e285400000000, 0x835a58c100000000, 0x7d21b8a500000000, + 0xe8f5c83000000000, 0x81d6786c00000000, 0x140208f900000000, + 0xea79e89d00000000, 0x7fad980800000000, 0x383f882400000000, + 0xadebf8b100000000, 0x539018d500000000, 0xc644684000000000, + 0xaf67d81c00000000, 0x3ab3a88900000000, 0xc4c848ed00000000, + 0x511c387800000000, 0x4aec69b500000000, 0xdf38192000000000, + 0x2143f94400000000, 0xb49789d100000000, 0xddb4398d00000000, + 0x4860491800000000, 0xb61ba97c00000000, 0x23cfd9e900000000, + 0x645dc9c500000000, 0xf189b95000000000, 0x0ff2593400000000, + 0x9a2629a100000000, 0xf30599fd00000000, 0x66d1e96800000000, + 0x98aa090c00000000, 0x0d7e799900000000, 0xef4cdb4d00000000, + 0x7a98abd800000000, 0x84e34bbc00000000, 0x11373b2900000000, + 0x78148b7500000000, 0xedc0fbe000000000, 0x13bb1b8400000000, + 0x866f6b1100000000, 0xc1fd7b3d00000000, 0x54290ba800000000, + 0xaa52ebcc00000000, 0x3f869b5900000000, 0x56a52b0500000000, + 0xc3715b9000000000, 0x3d0abbf400000000, 0xa8decb6100000000, + 0xb32e9aac00000000, 0x26faea3900000000, 0xd8810a5d00000000, + 0x4d557ac800000000, 0x2476ca9400000000, 0xb1a2ba0100000000, + 0x4fd95a6500000000, 0xda0d2af000000000, 0x9d9f3adc00000000, + 0x084b4a4900000000, 0xf630aa2d00000000, 0x63e4dab800000000, + 0x0ac76ae400000000, 0x9f131a7100000000, 0x6168fa1500000000, + 0xf4bc8a8000000000}, + {0x0000000000000000, 0x1f17f08000000000, 0x7f2891da00000000, + 0x603f615a00000000, 0xbf56536e00000000, 0xa041a3ee00000000, + 0xc07ec2b400000000, 0xdf69323400000000, 0x7eada6dc00000000, + 0x61ba565c00000000, 0x0185370600000000, 0x1e92c78600000000, + 0xc1fbf5b200000000, 0xdeec053200000000, 0xbed3646800000000, + 0xa1c494e800000000, 0xbd5c3c6200000000, 0xa24bcce200000000, + 0xc274adb800000000, 0xdd635d3800000000, 0x020a6f0c00000000, + 0x1d1d9f8c00000000, 0x7d22fed600000000, 0x62350e5600000000, + 0xc3f19abe00000000, 0xdce66a3e00000000, 0xbcd90b6400000000, + 0xa3cefbe400000000, 0x7ca7c9d000000000, 0x63b0395000000000, + 0x038f580a00000000, 0x1c98a88a00000000, 0x7ab978c400000000, + 0x65ae884400000000, 0x0591e91e00000000, 0x1a86199e00000000, + 0xc5ef2baa00000000, 0xdaf8db2a00000000, 0xbac7ba7000000000, + 0xa5d04af000000000, 0x0414de1800000000, 0x1b032e9800000000, + 0x7b3c4fc200000000, 0x642bbf4200000000, 0xbb428d7600000000, + 0xa4557df600000000, 0xc46a1cac00000000, 0xdb7dec2c00000000, + 0xc7e544a600000000, 0xd8f2b42600000000, 0xb8cdd57c00000000, + 0xa7da25fc00000000, 0x78b317c800000000, 0x67a4e74800000000, + 0x079b861200000000, 0x188c769200000000, 0xb948e27a00000000, + 0xa65f12fa00000000, 0xc66073a000000000, 0xd977832000000000, + 0x061eb11400000000, 0x1909419400000000, 0x793620ce00000000, + 0x6621d04e00000000, 0xb574805300000000, 0xaa6370d300000000, + 0xca5c118900000000, 0xd54be10900000000, 0x0a22d33d00000000, + 0x153523bd00000000, 0x750a42e700000000, 0x6a1db26700000000, + 0xcbd9268f00000000, 0xd4ced60f00000000, 0xb4f1b75500000000, + 0xabe647d500000000, 0x748f75e100000000, 0x6b98856100000000, + 0x0ba7e43b00000000, 0x14b014bb00000000, 0x0828bc3100000000, + 0x173f4cb100000000, 0x77002deb00000000, 0x6817dd6b00000000, + 0xb77eef5f00000000, 0xa8691fdf00000000, 0xc8567e8500000000, + 0xd7418e0500000000, 0x76851aed00000000, 0x6992ea6d00000000, + 0x09ad8b3700000000, 0x16ba7bb700000000, 0xc9d3498300000000, + 0xd6c4b90300000000, 0xb6fbd85900000000, 0xa9ec28d900000000, + 0xcfcdf89700000000, 0xd0da081700000000, 0xb0e5694d00000000, + 0xaff299cd00000000, 0x709babf900000000, 0x6f8c5b7900000000, + 0x0fb33a2300000000, 0x10a4caa300000000, 0xb1605e4b00000000, + 0xae77aecb00000000, 0xce48cf9100000000, 0xd15f3f1100000000, + 0x0e360d2500000000, 0x1121fda500000000, 0x711e9cff00000000, + 0x6e096c7f00000000, 0x7291c4f500000000, 0x6d86347500000000, + 0x0db9552f00000000, 0x12aea5af00000000, 0xcdc7979b00000000, + 0xd2d0671b00000000, 0xb2ef064100000000, 0xadf8f6c100000000, + 0x0c3c622900000000, 0x132b92a900000000, 0x7314f3f300000000, + 0x6c03037300000000, 0xb36a314700000000, 0xac7dc1c700000000, + 0xcc42a09d00000000, 0xd355501d00000000, 0x6ae900a700000000, + 0x75fef02700000000, 0x15c1917d00000000, 0x0ad661fd00000000, + 0xd5bf53c900000000, 0xcaa8a34900000000, 0xaa97c21300000000, + 0xb580329300000000, 0x1444a67b00000000, 0x0b5356fb00000000, + 0x6b6c37a100000000, 0x747bc72100000000, 0xab12f51500000000, + 0xb405059500000000, 0xd43a64cf00000000, 0xcb2d944f00000000, + 0xd7b53cc500000000, 0xc8a2cc4500000000, 0xa89dad1f00000000, + 0xb78a5d9f00000000, 0x68e36fab00000000, 0x77f49f2b00000000, + 0x17cbfe7100000000, 0x08dc0ef100000000, 0xa9189a1900000000, + 0xb60f6a9900000000, 0xd6300bc300000000, 0xc927fb4300000000, + 0x164ec97700000000, 0x095939f700000000, 0x696658ad00000000, + 0x7671a82d00000000, 0x1050786300000000, 0x0f4788e300000000, + 0x6f78e9b900000000, 0x706f193900000000, 0xaf062b0d00000000, + 0xb011db8d00000000, 0xd02ebad700000000, 0xcf394a5700000000, + 0x6efddebf00000000, 0x71ea2e3f00000000, 0x11d54f6500000000, + 0x0ec2bfe500000000, 0xd1ab8dd100000000, 0xcebc7d5100000000, + 0xae831c0b00000000, 0xb194ec8b00000000, 0xad0c440100000000, + 0xb21bb48100000000, 0xd224d5db00000000, 0xcd33255b00000000, + 0x125a176f00000000, 0x0d4de7ef00000000, 0x6d7286b500000000, + 0x7265763500000000, 0xd3a1e2dd00000000, 0xccb6125d00000000, + 0xac89730700000000, 0xb39e838700000000, 0x6cf7b1b300000000, + 0x73e0413300000000, 0x13df206900000000, 0x0cc8d0e900000000, + 0xdf9d80f400000000, 0xc08a707400000000, 0xa0b5112e00000000, + 0xbfa2e1ae00000000, 0x60cbd39a00000000, 0x7fdc231a00000000, + 0x1fe3424000000000, 0x00f4b2c000000000, 0xa130262800000000, + 0xbe27d6a800000000, 0xde18b7f200000000, 0xc10f477200000000, + 0x1e66754600000000, 0x017185c600000000, 0x614ee49c00000000, + 0x7e59141c00000000, 0x62c1bc9600000000, 0x7dd64c1600000000, + 0x1de92d4c00000000, 0x02feddcc00000000, 0xdd97eff800000000, + 0xc2801f7800000000, 0xa2bf7e2200000000, 0xbda88ea200000000, + 0x1c6c1a4a00000000, 0x037beaca00000000, 0x63448b9000000000, + 0x7c537b1000000000, 0xa33a492400000000, 0xbc2db9a400000000, + 0xdc12d8fe00000000, 0xc305287e00000000, 0xa524f83000000000, + 0xba3308b000000000, 0xda0c69ea00000000, 0xc51b996a00000000, + 0x1a72ab5e00000000, 0x05655bde00000000, 0x655a3a8400000000, + 0x7a4dca0400000000, 0xdb895eec00000000, 0xc49eae6c00000000, + 0xa4a1cf3600000000, 0xbbb63fb600000000, 0x64df0d8200000000, + 0x7bc8fd0200000000, 0x1bf79c5800000000, 0x04e06cd800000000, + 0x1878c45200000000, 0x076f34d200000000, 0x6750558800000000, + 0x7847a50800000000, 0xa72e973c00000000, 0xb83967bc00000000, + 0xd80606e600000000, 0xc711f66600000000, 0x66d5628e00000000, + 0x79c2920e00000000, 0x19fdf35400000000, 0x06ea03d400000000, + 0xd98331e000000000, 0xc694c16000000000, 0xa6aba03a00000000, + 0xb9bc50ba00000000}, + {0x0000000000000000, 0xe2fd888d00000000, 0x85fd60c000000000, + 0x6700e84d00000000, 0x4bfdb05b00000000, 0xa90038d600000000, + 0xce00d09b00000000, 0x2cfd581600000000, 0x96fa61b700000000, + 0x7407e93a00000000, 0x1307017700000000, 0xf1fa89fa00000000, + 0xdd07d1ec00000000, 0x3ffa596100000000, 0x58fab12c00000000, + 0xba0739a100000000, 0x6df3b2b500000000, 0x8f0e3a3800000000, + 0xe80ed27500000000, 0x0af35af800000000, 0x260e02ee00000000, + 0xc4f38a6300000000, 0xa3f3622e00000000, 0x410eeaa300000000, + 0xfb09d30200000000, 0x19f45b8f00000000, 0x7ef4b3c200000000, + 0x9c093b4f00000000, 0xb0f4635900000000, 0x5209ebd400000000, + 0x3509039900000000, 0xd7f48b1400000000, 0x9be014b000000000, + 0x791d9c3d00000000, 0x1e1d747000000000, 0xfce0fcfd00000000, + 0xd01da4eb00000000, 0x32e02c6600000000, 0x55e0c42b00000000, + 0xb71d4ca600000000, 0x0d1a750700000000, 0xefe7fd8a00000000, + 0x88e715c700000000, 0x6a1a9d4a00000000, 0x46e7c55c00000000, + 0xa41a4dd100000000, 0xc31aa59c00000000, 0x21e72d1100000000, + 0xf613a60500000000, 0x14ee2e8800000000, 0x73eec6c500000000, + 0x91134e4800000000, 0xbdee165e00000000, 0x5f139ed300000000, + 0x3813769e00000000, 0xdaeefe1300000000, 0x60e9c7b200000000, + 0x82144f3f00000000, 0xe514a77200000000, 0x07e92fff00000000, + 0x2b1477e900000000, 0xc9e9ff6400000000, 0xaee9172900000000, + 0x4c149fa400000000, 0x77c758bb00000000, 0x953ad03600000000, + 0xf23a387b00000000, 0x10c7b0f600000000, 0x3c3ae8e000000000, + 0xdec7606d00000000, 0xb9c7882000000000, 0x5b3a00ad00000000, + 0xe13d390c00000000, 0x03c0b18100000000, 0x64c059cc00000000, + 0x863dd14100000000, 0xaac0895700000000, 0x483d01da00000000, + 0x2f3de99700000000, 0xcdc0611a00000000, 0x1a34ea0e00000000, + 0xf8c9628300000000, 0x9fc98ace00000000, 0x7d34024300000000, + 0x51c95a5500000000, 0xb334d2d800000000, 0xd4343a9500000000, + 0x36c9b21800000000, 0x8cce8bb900000000, 0x6e33033400000000, + 0x0933eb7900000000, 0xebce63f400000000, 0xc7333be200000000, + 0x25ceb36f00000000, 0x42ce5b2200000000, 0xa033d3af00000000, + 0xec274c0b00000000, 0x0edac48600000000, 0x69da2ccb00000000, + 0x8b27a44600000000, 0xa7dafc5000000000, 0x452774dd00000000, + 0x22279c9000000000, 0xc0da141d00000000, 0x7add2dbc00000000, + 0x9820a53100000000, 0xff204d7c00000000, 0x1dddc5f100000000, + 0x31209de700000000, 0xd3dd156a00000000, 0xb4ddfd2700000000, + 0x562075aa00000000, 0x81d4febe00000000, 0x6329763300000000, + 0x04299e7e00000000, 0xe6d416f300000000, 0xca294ee500000000, + 0x28d4c66800000000, 0x4fd42e2500000000, 0xad29a6a800000000, + 0x172e9f0900000000, 0xf5d3178400000000, 0x92d3ffc900000000, + 0x702e774400000000, 0x5cd32f5200000000, 0xbe2ea7df00000000, + 0xd92e4f9200000000, 0x3bd3c71f00000000, 0xaf88c0ad00000000, + 0x4d75482000000000, 0x2a75a06d00000000, 0xc88828e000000000, + 0xe47570f600000000, 0x0688f87b00000000, 0x6188103600000000, + 0x837598bb00000000, 0x3972a11a00000000, 0xdb8f299700000000, + 0xbc8fc1da00000000, 0x5e72495700000000, 0x728f114100000000, + 0x907299cc00000000, 0xf772718100000000, 0x158ff90c00000000, + 0xc27b721800000000, 0x2086fa9500000000, 0x478612d800000000, + 0xa57b9a5500000000, 0x8986c24300000000, 0x6b7b4ace00000000, + 0x0c7ba28300000000, 0xee862a0e00000000, 0x548113af00000000, + 0xb67c9b2200000000, 0xd17c736f00000000, 0x3381fbe200000000, + 0x1f7ca3f400000000, 0xfd812b7900000000, 0x9a81c33400000000, + 0x787c4bb900000000, 0x3468d41d00000000, 0xd6955c9000000000, + 0xb195b4dd00000000, 0x53683c5000000000, 0x7f95644600000000, + 0x9d68eccb00000000, 0xfa68048600000000, 0x18958c0b00000000, + 0xa292b5aa00000000, 0x406f3d2700000000, 0x276fd56a00000000, + 0xc5925de700000000, 0xe96f05f100000000, 0x0b928d7c00000000, + 0x6c92653100000000, 0x8e6fedbc00000000, 0x599b66a800000000, + 0xbb66ee2500000000, 0xdc66066800000000, 0x3e9b8ee500000000, + 0x1266d6f300000000, 0xf09b5e7e00000000, 0x979bb63300000000, + 0x75663ebe00000000, 0xcf61071f00000000, 0x2d9c8f9200000000, + 0x4a9c67df00000000, 0xa861ef5200000000, 0x849cb74400000000, + 0x66613fc900000000, 0x0161d78400000000, 0xe39c5f0900000000, + 0xd84f981600000000, 0x3ab2109b00000000, 0x5db2f8d600000000, + 0xbf4f705b00000000, 0x93b2284d00000000, 0x714fa0c000000000, + 0x164f488d00000000, 0xf4b2c00000000000, 0x4eb5f9a100000000, + 0xac48712c00000000, 0xcb48996100000000, 0x29b511ec00000000, + 0x054849fa00000000, 0xe7b5c17700000000, 0x80b5293a00000000, + 0x6248a1b700000000, 0xb5bc2aa300000000, 0x5741a22e00000000, + 0x30414a6300000000, 0xd2bcc2ee00000000, 0xfe419af800000000, + 0x1cbc127500000000, 0x7bbcfa3800000000, 0x994172b500000000, + 0x23464b1400000000, 0xc1bbc39900000000, 0xa6bb2bd400000000, + 0x4446a35900000000, 0x68bbfb4f00000000, 0x8a4673c200000000, + 0xed469b8f00000000, 0x0fbb130200000000, 0x43af8ca600000000, + 0xa152042b00000000, 0xc652ec6600000000, 0x24af64eb00000000, + 0x08523cfd00000000, 0xeaafb47000000000, 0x8daf5c3d00000000, + 0x6f52d4b000000000, 0xd555ed1100000000, 0x37a8659c00000000, + 0x50a88dd100000000, 0xb255055c00000000, 0x9ea85d4a00000000, + 0x7c55d5c700000000, 0x1b553d8a00000000, 0xf9a8b50700000000, + 0x2e5c3e1300000000, 0xcca1b69e00000000, 0xaba15ed300000000, + 0x495cd65e00000000, 0x65a18e4800000000, 0x875c06c500000000, + 0xe05cee8800000000, 0x02a1660500000000, 0xb8a65fa400000000, + 0x5a5bd72900000000, 0x3d5b3f6400000000, 0xdfa6b7e900000000, + 0xf35befff00000000, 0x11a6677200000000, 0x76a68f3f00000000, + 0x945b07b200000000}, + {0x0000000000000000, 0xa90b894e00000000, 0x5217129d00000000, + 0xfb1c9bd300000000, 0xe52855e100000000, 0x4c23dcaf00000000, + 0xb73f477c00000000, 0x1e34ce3200000000, 0x8b57db1900000000, + 0x225c525700000000, 0xd940c98400000000, 0x704b40ca00000000, + 0x6e7f8ef800000000, 0xc77407b600000000, 0x3c689c6500000000, + 0x9563152b00000000, 0x16afb63300000000, 0xbfa43f7d00000000, + 0x44b8a4ae00000000, 0xedb32de000000000, 0xf387e3d200000000, + 0x5a8c6a9c00000000, 0xa190f14f00000000, 0x089b780100000000, + 0x9df86d2a00000000, 0x34f3e46400000000, 0xcfef7fb700000000, + 0x66e4f6f900000000, 0x78d038cb00000000, 0xd1dbb18500000000, + 0x2ac72a5600000000, 0x83cca31800000000, 0x2c5e6d6700000000, + 0x8555e42900000000, 0x7e497ffa00000000, 0xd742f6b400000000, + 0xc976388600000000, 0x607db1c800000000, 0x9b612a1b00000000, + 0x326aa35500000000, 0xa709b67e00000000, 0x0e023f3000000000, + 0xf51ea4e300000000, 0x5c152dad00000000, 0x4221e39f00000000, + 0xeb2a6ad100000000, 0x1036f10200000000, 0xb93d784c00000000, + 0x3af1db5400000000, 0x93fa521a00000000, 0x68e6c9c900000000, + 0xc1ed408700000000, 0xdfd98eb500000000, 0x76d207fb00000000, + 0x8dce9c2800000000, 0x24c5156600000000, 0xb1a6004d00000000, + 0x18ad890300000000, 0xe3b112d000000000, 0x4aba9b9e00000000, + 0x548e55ac00000000, 0xfd85dce200000000, 0x0699473100000000, + 0xaf92ce7f00000000, 0x58bcdace00000000, 0xf1b7538000000000, + 0x0aabc85300000000, 0xa3a0411d00000000, 0xbd948f2f00000000, + 0x149f066100000000, 0xef839db200000000, 0x468814fc00000000, + 0xd3eb01d700000000, 0x7ae0889900000000, 0x81fc134a00000000, + 0x28f79a0400000000, 0x36c3543600000000, 0x9fc8dd7800000000, + 0x64d446ab00000000, 0xcddfcfe500000000, 0x4e136cfd00000000, + 0xe718e5b300000000, 0x1c047e6000000000, 0xb50ff72e00000000, + 0xab3b391c00000000, 0x0230b05200000000, 0xf92c2b8100000000, + 0x5027a2cf00000000, 0xc544b7e400000000, 0x6c4f3eaa00000000, + 0x9753a57900000000, 0x3e582c3700000000, 0x206ce20500000000, + 0x89676b4b00000000, 0x727bf09800000000, 0xdb7079d600000000, + 0x74e2b7a900000000, 0xdde93ee700000000, 0x26f5a53400000000, + 0x8ffe2c7a00000000, 0x91cae24800000000, 0x38c16b0600000000, + 0xc3ddf0d500000000, 0x6ad6799b00000000, 0xffb56cb000000000, + 0x56bee5fe00000000, 0xada27e2d00000000, 0x04a9f76300000000, + 0x1a9d395100000000, 0xb396b01f00000000, 0x488a2bcc00000000, + 0xe181a28200000000, 0x624d019a00000000, 0xcb4688d400000000, + 0x305a130700000000, 0x99519a4900000000, 0x8765547b00000000, + 0x2e6edd3500000000, 0xd57246e600000000, 0x7c79cfa800000000, + 0xe91ada8300000000, 0x401153cd00000000, 0xbb0dc81e00000000, + 0x1206415000000000, 0x0c328f6200000000, 0xa539062c00000000, + 0x5e259dff00000000, 0xf72e14b100000000, 0xf17ec44600000000, + 0x58754d0800000000, 0xa369d6db00000000, 0x0a625f9500000000, + 0x145691a700000000, 0xbd5d18e900000000, 0x4641833a00000000, + 0xef4a0a7400000000, 0x7a291f5f00000000, 0xd322961100000000, + 0x283e0dc200000000, 0x8135848c00000000, 0x9f014abe00000000, + 0x360ac3f000000000, 0xcd16582300000000, 0x641dd16d00000000, + 0xe7d1727500000000, 0x4edafb3b00000000, 0xb5c660e800000000, + 0x1ccde9a600000000, 0x02f9279400000000, 0xabf2aeda00000000, + 0x50ee350900000000, 0xf9e5bc4700000000, 0x6c86a96c00000000, + 0xc58d202200000000, 0x3e91bbf100000000, 0x979a32bf00000000, + 0x89aefc8d00000000, 0x20a575c300000000, 0xdbb9ee1000000000, + 0x72b2675e00000000, 0xdd20a92100000000, 0x742b206f00000000, + 0x8f37bbbc00000000, 0x263c32f200000000, 0x3808fcc000000000, + 0x9103758e00000000, 0x6a1fee5d00000000, 0xc314671300000000, + 0x5677723800000000, 0xff7cfb7600000000, 0x046060a500000000, + 0xad6be9eb00000000, 0xb35f27d900000000, 0x1a54ae9700000000, + 0xe148354400000000, 0x4843bc0a00000000, 0xcb8f1f1200000000, + 0x6284965c00000000, 0x99980d8f00000000, 0x309384c100000000, + 0x2ea74af300000000, 0x87acc3bd00000000, 0x7cb0586e00000000, + 0xd5bbd12000000000, 0x40d8c40b00000000, 0xe9d34d4500000000, + 0x12cfd69600000000, 0xbbc45fd800000000, 0xa5f091ea00000000, + 0x0cfb18a400000000, 0xf7e7837700000000, 0x5eec0a3900000000, + 0xa9c21e8800000000, 0x00c997c600000000, 0xfbd50c1500000000, + 0x52de855b00000000, 0x4cea4b6900000000, 0xe5e1c22700000000, + 0x1efd59f400000000, 0xb7f6d0ba00000000, 0x2295c59100000000, + 0x8b9e4cdf00000000, 0x7082d70c00000000, 0xd9895e4200000000, + 0xc7bd907000000000, 0x6eb6193e00000000, 0x95aa82ed00000000, + 0x3ca10ba300000000, 0xbf6da8bb00000000, 0x166621f500000000, + 0xed7aba2600000000, 0x4471336800000000, 0x5a45fd5a00000000, + 0xf34e741400000000, 0x0852efc700000000, 0xa159668900000000, + 0x343a73a200000000, 0x9d31faec00000000, 0x662d613f00000000, + 0xcf26e87100000000, 0xd112264300000000, 0x7819af0d00000000, + 0x830534de00000000, 0x2a0ebd9000000000, 0x859c73ef00000000, + 0x2c97faa100000000, 0xd78b617200000000, 0x7e80e83c00000000, + 0x60b4260e00000000, 0xc9bfaf4000000000, 0x32a3349300000000, + 0x9ba8bddd00000000, 0x0ecba8f600000000, 0xa7c021b800000000, + 0x5cdcba6b00000000, 0xf5d7332500000000, 0xebe3fd1700000000, + 0x42e8745900000000, 0xb9f4ef8a00000000, 0x10ff66c400000000, + 0x9333c5dc00000000, 0x3a384c9200000000, 0xc124d74100000000, + 0x682f5e0f00000000, 0x761b903d00000000, 0xdf10197300000000, + 0x240c82a000000000, 0x8d070bee00000000, 0x18641ec500000000, + 0xb16f978b00000000, 0x4a730c5800000000, 0xe378851600000000, + 0xfd4c4b2400000000, 0x5447c26a00000000, 0xaf5b59b900000000, + 0x0650d0f700000000}, + {0x0000000000000000, 0x479244af00000000, 0xcf22f88500000000, + 0x88b0bc2a00000000, 0xdf4381d000000000, 0x98d1c57f00000000, + 0x1061795500000000, 0x57f33dfa00000000, 0xff81737a00000000, + 0xb81337d500000000, 0x30a38bff00000000, 0x7731cf5000000000, + 0x20c2f2aa00000000, 0x6750b60500000000, 0xefe00a2f00000000, + 0xa8724e8000000000, 0xfe03e7f400000000, 0xb991a35b00000000, + 0x31211f7100000000, 0x76b35bde00000000, 0x2140662400000000, + 0x66d2228b00000000, 0xee629ea100000000, 0xa9f0da0e00000000, + 0x0182948e00000000, 0x4610d02100000000, 0xcea06c0b00000000, + 0x893228a400000000, 0xdec1155e00000000, 0x995351f100000000, + 0x11e3eddb00000000, 0x5671a97400000000, 0xbd01bf3200000000, + 0xfa93fb9d00000000, 0x722347b700000000, 0x35b1031800000000, + 0x62423ee200000000, 0x25d07a4d00000000, 0xad60c66700000000, + 0xeaf282c800000000, 0x4280cc4800000000, 0x051288e700000000, + 0x8da234cd00000000, 0xca30706200000000, 0x9dc34d9800000000, + 0xda51093700000000, 0x52e1b51d00000000, 0x1573f1b200000000, + 0x430258c600000000, 0x04901c6900000000, 0x8c20a04300000000, + 0xcbb2e4ec00000000, 0x9c41d91600000000, 0xdbd39db900000000, + 0x5363219300000000, 0x14f1653c00000000, 0xbc832bbc00000000, + 0xfb116f1300000000, 0x73a1d33900000000, 0x3433979600000000, + 0x63c0aa6c00000000, 0x2452eec300000000, 0xace252e900000000, + 0xeb70164600000000, 0x7a037e6500000000, 0x3d913aca00000000, + 0xb52186e000000000, 0xf2b3c24f00000000, 0xa540ffb500000000, + 0xe2d2bb1a00000000, 0x6a62073000000000, 0x2df0439f00000000, + 0x85820d1f00000000, 0xc21049b000000000, 0x4aa0f59a00000000, + 0x0d32b13500000000, 0x5ac18ccf00000000, 0x1d53c86000000000, + 0x95e3744a00000000, 0xd27130e500000000, 0x8400999100000000, + 0xc392dd3e00000000, 0x4b22611400000000, 0x0cb025bb00000000, + 0x5b43184100000000, 0x1cd15cee00000000, 0x9461e0c400000000, + 0xd3f3a46b00000000, 0x7b81eaeb00000000, 0x3c13ae4400000000, + 0xb4a3126e00000000, 0xf33156c100000000, 0xa4c26b3b00000000, + 0xe3502f9400000000, 0x6be093be00000000, 0x2c72d71100000000, + 0xc702c15700000000, 0x809085f800000000, 0x082039d200000000, + 0x4fb27d7d00000000, 0x1841408700000000, 0x5fd3042800000000, + 0xd763b80200000000, 0x90f1fcad00000000, 0x3883b22d00000000, + 0x7f11f68200000000, 0xf7a14aa800000000, 0xb0330e0700000000, + 0xe7c033fd00000000, 0xa052775200000000, 0x28e2cb7800000000, + 0x6f708fd700000000, 0x390126a300000000, 0x7e93620c00000000, + 0xf623de2600000000, 0xb1b19a8900000000, 0xe642a77300000000, + 0xa1d0e3dc00000000, 0x29605ff600000000, 0x6ef21b5900000000, + 0xc68055d900000000, 0x8112117600000000, 0x09a2ad5c00000000, + 0x4e30e9f300000000, 0x19c3d40900000000, 0x5e5190a600000000, + 0xd6e12c8c00000000, 0x9173682300000000, 0xf406fcca00000000, + 0xb394b86500000000, 0x3b24044f00000000, 0x7cb640e000000000, + 0x2b457d1a00000000, 0x6cd739b500000000, 0xe467859f00000000, + 0xa3f5c13000000000, 0x0b878fb000000000, 0x4c15cb1f00000000, + 0xc4a5773500000000, 0x8337339a00000000, 0xd4c40e6000000000, + 0x93564acf00000000, 0x1be6f6e500000000, 0x5c74b24a00000000, + 0x0a051b3e00000000, 0x4d975f9100000000, 0xc527e3bb00000000, + 0x82b5a71400000000, 0xd5469aee00000000, 0x92d4de4100000000, + 0x1a64626b00000000, 0x5df626c400000000, 0xf584684400000000, + 0xb2162ceb00000000, 0x3aa690c100000000, 0x7d34d46e00000000, + 0x2ac7e99400000000, 0x6d55ad3b00000000, 0xe5e5111100000000, + 0xa27755be00000000, 0x490743f800000000, 0x0e95075700000000, + 0x8625bb7d00000000, 0xc1b7ffd200000000, 0x9644c22800000000, + 0xd1d6868700000000, 0x59663aad00000000, 0x1ef47e0200000000, + 0xb686308200000000, 0xf114742d00000000, 0x79a4c80700000000, + 0x3e368ca800000000, 0x69c5b15200000000, 0x2e57f5fd00000000, + 0xa6e749d700000000, 0xe1750d7800000000, 0xb704a40c00000000, + 0xf096e0a300000000, 0x78265c8900000000, 0x3fb4182600000000, + 0x684725dc00000000, 0x2fd5617300000000, 0xa765dd5900000000, + 0xe0f799f600000000, 0x4885d77600000000, 0x0f1793d900000000, + 0x87a72ff300000000, 0xc0356b5c00000000, 0x97c656a600000000, + 0xd054120900000000, 0x58e4ae2300000000, 0x1f76ea8c00000000, + 0x8e0582af00000000, 0xc997c60000000000, 0x41277a2a00000000, + 0x06b53e8500000000, 0x5146037f00000000, 0x16d447d000000000, + 0x9e64fbfa00000000, 0xd9f6bf5500000000, 0x7184f1d500000000, + 0x3616b57a00000000, 0xbea6095000000000, 0xf9344dff00000000, + 0xaec7700500000000, 0xe95534aa00000000, 0x61e5888000000000, + 0x2677cc2f00000000, 0x7006655b00000000, 0x379421f400000000, + 0xbf249dde00000000, 0xf8b6d97100000000, 0xaf45e48b00000000, + 0xe8d7a02400000000, 0x60671c0e00000000, 0x27f558a100000000, + 0x8f87162100000000, 0xc815528e00000000, 0x40a5eea400000000, + 0x0737aa0b00000000, 0x50c497f100000000, 0x1756d35e00000000, + 0x9fe66f7400000000, 0xd8742bdb00000000, 0x33043d9d00000000, + 0x7496793200000000, 0xfc26c51800000000, 0xbbb481b700000000, + 0xec47bc4d00000000, 0xabd5f8e200000000, 0x236544c800000000, + 0x64f7006700000000, 0xcc854ee700000000, 0x8b170a4800000000, + 0x03a7b66200000000, 0x4435f2cd00000000, 0x13c6cf3700000000, + 0x54548b9800000000, 0xdce437b200000000, 0x9b76731d00000000, + 0xcd07da6900000000, 0x8a959ec600000000, 0x022522ec00000000, + 0x45b7664300000000, 0x12445bb900000000, 0x55d61f1600000000, + 0xdd66a33c00000000, 0x9af4e79300000000, 0x3286a91300000000, + 0x7514edbc00000000, 0xfda4519600000000, 0xba36153900000000, + 0xedc528c300000000, 0xaa576c6c00000000, 0x22e7d04600000000, + 0x657594e900000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x65673b46, 0xcace768c, 0xafa94dca, 0x4eedeb59, + 0x2b8ad01f, 0x84239dd5, 0xe144a693, 0x9ddbd6b2, 0xf8bcedf4, + 0x5715a03e, 0x32729b78, 0xd3363deb, 0xb65106ad, 0x19f84b67, + 0x7c9f7021, 0xe0c6ab25, 0x85a19063, 0x2a08dda9, 0x4f6fe6ef, + 0xae2b407c, 0xcb4c7b3a, 0x64e536f0, 0x01820db6, 0x7d1d7d97, + 0x187a46d1, 0xb7d30b1b, 0xd2b4305d, 0x33f096ce, 0x5697ad88, + 0xf93ee042, 0x9c59db04, 0x1afc500b, 0x7f9b6b4d, 0xd0322687, + 0xb5551dc1, 0x5411bb52, 0x31768014, 0x9edfcdde, 0xfbb8f698, + 0x872786b9, 0xe240bdff, 0x4de9f035, 0x288ecb73, 0xc9ca6de0, + 0xacad56a6, 0x03041b6c, 0x6663202a, 0xfa3afb2e, 0x9f5dc068, + 0x30f48da2, 0x5593b6e4, 0xb4d71077, 0xd1b02b31, 0x7e1966fb, + 0x1b7e5dbd, 0x67e12d9c, 0x028616da, 0xad2f5b10, 0xc8486056, + 0x290cc6c5, 0x4c6bfd83, 0xe3c2b049, 0x86a58b0f, 0x35f8a016, + 0x509f9b50, 0xff36d69a, 0x9a51eddc, 0x7b154b4f, 0x1e727009, + 0xb1db3dc3, 0xd4bc0685, 0xa82376a4, 0xcd444de2, 0x62ed0028, + 0x078a3b6e, 0xe6ce9dfd, 0x83a9a6bb, 0x2c00eb71, 0x4967d037, + 0xd53e0b33, 0xb0593075, 0x1ff07dbf, 0x7a9746f9, 0x9bd3e06a, + 0xfeb4db2c, 0x511d96e6, 0x347aada0, 0x48e5dd81, 0x2d82e6c7, + 0x822bab0d, 0xe74c904b, 0x060836d8, 0x636f0d9e, 0xccc64054, + 0xa9a17b12, 0x2f04f01d, 0x4a63cb5b, 0xe5ca8691, 0x80adbdd7, + 0x61e91b44, 0x048e2002, 0xab276dc8, 0xce40568e, 0xb2df26af, + 0xd7b81de9, 0x78115023, 0x1d766b65, 0xfc32cdf6, 0x9955f6b0, + 0x36fcbb7a, 0x539b803c, 0xcfc25b38, 0xaaa5607e, 0x050c2db4, + 0x606b16f2, 0x812fb061, 0xe4488b27, 0x4be1c6ed, 0x2e86fdab, + 0x52198d8a, 0x377eb6cc, 0x98d7fb06, 0xfdb0c040, 0x1cf466d3, + 0x79935d95, 0xd63a105f, 0xb35d2b19, 0x6bf1402c, 0x0e967b6a, + 0xa13f36a0, 0xc4580de6, 0x251cab75, 0x407b9033, 0xefd2ddf9, + 0x8ab5e6bf, 0xf62a969e, 0x934dadd8, 0x3ce4e012, 0x5983db54, + 0xb8c77dc7, 0xdda04681, 0x72090b4b, 0x176e300d, 0x8b37eb09, + 0xee50d04f, 0x41f99d85, 0x249ea6c3, 0xc5da0050, 0xa0bd3b16, + 0x0f1476dc, 0x6a734d9a, 0x16ec3dbb, 0x738b06fd, 0xdc224b37, + 0xb9457071, 0x5801d6e2, 0x3d66eda4, 0x92cfa06e, 0xf7a89b28, + 0x710d1027, 0x146a2b61, 0xbbc366ab, 0xdea45ded, 0x3fe0fb7e, + 0x5a87c038, 0xf52e8df2, 0x9049b6b4, 0xecd6c695, 0x89b1fdd3, + 0x2618b019, 0x437f8b5f, 0xa23b2dcc, 0xc75c168a, 0x68f55b40, + 0x0d926006, 0x91cbbb02, 0xf4ac8044, 0x5b05cd8e, 0x3e62f6c8, + 0xdf26505b, 0xba416b1d, 0x15e826d7, 0x708f1d91, 0x0c106db0, + 0x697756f6, 0xc6de1b3c, 0xa3b9207a, 0x42fd86e9, 0x279abdaf, + 0x8833f065, 0xed54cb23, 0x5e09e03a, 0x3b6edb7c, 0x94c796b6, + 0xf1a0adf0, 0x10e40b63, 0x75833025, 0xda2a7def, 0xbf4d46a9, + 0xc3d23688, 0xa6b50dce, 0x091c4004, 0x6c7b7b42, 0x8d3fddd1, + 0xe858e697, 0x47f1ab5d, 0x2296901b, 0xbecf4b1f, 0xdba87059, + 0x74013d93, 0x116606d5, 0xf022a046, 0x95459b00, 0x3aecd6ca, + 0x5f8bed8c, 0x23149dad, 0x4673a6eb, 0xe9daeb21, 0x8cbdd067, + 0x6df976f4, 0x089e4db2, 0xa7370078, 0xc2503b3e, 0x44f5b031, + 0x21928b77, 0x8e3bc6bd, 0xeb5cfdfb, 0x0a185b68, 0x6f7f602e, + 0xc0d62de4, 0xa5b116a2, 0xd92e6683, 0xbc495dc5, 0x13e0100f, + 0x76872b49, 0x97c38dda, 0xf2a4b69c, 0x5d0dfb56, 0x386ac010, + 0xa4331b14, 0xc1542052, 0x6efd6d98, 0x0b9a56de, 0xeadef04d, + 0x8fb9cb0b, 0x201086c1, 0x4577bd87, 0x39e8cda6, 0x5c8ff6e0, + 0xf326bb2a, 0x9641806c, 0x770526ff, 0x12621db9, 0xbdcb5073, + 0xd8ac6b35}, + {0x00000000, 0xd7e28058, 0x74b406f1, 0xa35686a9, 0xe9680de2, + 0x3e8a8dba, 0x9ddc0b13, 0x4a3e8b4b, 0x09a11d85, 0xde439ddd, + 0x7d151b74, 0xaaf79b2c, 0xe0c91067, 0x372b903f, 0x947d1696, + 0x439f96ce, 0x13423b0a, 0xc4a0bb52, 0x67f63dfb, 0xb014bda3, + 0xfa2a36e8, 0x2dc8b6b0, 0x8e9e3019, 0x597cb041, 0x1ae3268f, + 0xcd01a6d7, 0x6e57207e, 0xb9b5a026, 0xf38b2b6d, 0x2469ab35, + 0x873f2d9c, 0x50ddadc4, 0x26847614, 0xf166f64c, 0x523070e5, + 0x85d2f0bd, 0xcfec7bf6, 0x180efbae, 0xbb587d07, 0x6cbafd5f, + 0x2f256b91, 0xf8c7ebc9, 0x5b916d60, 0x8c73ed38, 0xc64d6673, + 0x11afe62b, 0xb2f96082, 0x651be0da, 0x35c64d1e, 0xe224cd46, + 0x41724bef, 0x9690cbb7, 0xdcae40fc, 0x0b4cc0a4, 0xa81a460d, + 0x7ff8c655, 0x3c67509b, 0xeb85d0c3, 0x48d3566a, 0x9f31d632, + 0xd50f5d79, 0x02eddd21, 0xa1bb5b88, 0x7659dbd0, 0x4d08ec28, + 0x9aea6c70, 0x39bcead9, 0xee5e6a81, 0xa460e1ca, 0x73826192, + 0xd0d4e73b, 0x07366763, 0x44a9f1ad, 0x934b71f5, 0x301df75c, + 0xe7ff7704, 0xadc1fc4f, 0x7a237c17, 0xd975fabe, 0x0e977ae6, + 0x5e4ad722, 0x89a8577a, 0x2afed1d3, 0xfd1c518b, 0xb722dac0, + 0x60c05a98, 0xc396dc31, 0x14745c69, 0x57ebcaa7, 0x80094aff, + 0x235fcc56, 0xf4bd4c0e, 0xbe83c745, 0x6961471d, 0xca37c1b4, + 0x1dd541ec, 0x6b8c9a3c, 0xbc6e1a64, 0x1f389ccd, 0xc8da1c95, + 0x82e497de, 0x55061786, 0xf650912f, 0x21b21177, 0x622d87b9, + 0xb5cf07e1, 0x16998148, 0xc17b0110, 0x8b458a5b, 0x5ca70a03, + 0xfff18caa, 0x28130cf2, 0x78cea136, 0xaf2c216e, 0x0c7aa7c7, + 0xdb98279f, 0x91a6acd4, 0x46442c8c, 0xe512aa25, 0x32f02a7d, + 0x716fbcb3, 0xa68d3ceb, 0x05dbba42, 0xd2393a1a, 0x9807b151, + 0x4fe53109, 0xecb3b7a0, 0x3b5137f8, 0x9a11d850, 0x4df35808, + 0xeea5dea1, 0x39475ef9, 0x7379d5b2, 0xa49b55ea, 0x07cdd343, + 0xd02f531b, 0x93b0c5d5, 0x4452458d, 0xe704c324, 0x30e6437c, + 0x7ad8c837, 0xad3a486f, 0x0e6ccec6, 0xd98e4e9e, 0x8953e35a, + 0x5eb16302, 0xfde7e5ab, 0x2a0565f3, 0x603beeb8, 0xb7d96ee0, + 0x148fe849, 0xc36d6811, 0x80f2fedf, 0x57107e87, 0xf446f82e, + 0x23a47876, 0x699af33d, 0xbe787365, 0x1d2ef5cc, 0xcacc7594, + 0xbc95ae44, 0x6b772e1c, 0xc821a8b5, 0x1fc328ed, 0x55fda3a6, + 0x821f23fe, 0x2149a557, 0xf6ab250f, 0xb534b3c1, 0x62d63399, + 0xc180b530, 0x16623568, 0x5c5cbe23, 0x8bbe3e7b, 0x28e8b8d2, + 0xff0a388a, 0xafd7954e, 0x78351516, 0xdb6393bf, 0x0c8113e7, + 0x46bf98ac, 0x915d18f4, 0x320b9e5d, 0xe5e91e05, 0xa67688cb, + 0x71940893, 0xd2c28e3a, 0x05200e62, 0x4f1e8529, 0x98fc0571, + 0x3baa83d8, 0xec480380, 0xd7193478, 0x00fbb420, 0xa3ad3289, + 0x744fb2d1, 0x3e71399a, 0xe993b9c2, 0x4ac53f6b, 0x9d27bf33, + 0xdeb829fd, 0x095aa9a5, 0xaa0c2f0c, 0x7deeaf54, 0x37d0241f, + 0xe032a447, 0x436422ee, 0x9486a2b6, 0xc45b0f72, 0x13b98f2a, + 0xb0ef0983, 0x670d89db, 0x2d330290, 0xfad182c8, 0x59870461, + 0x8e658439, 0xcdfa12f7, 0x1a1892af, 0xb94e1406, 0x6eac945e, + 0x24921f15, 0xf3709f4d, 0x502619e4, 0x87c499bc, 0xf19d426c, + 0x267fc234, 0x8529449d, 0x52cbc4c5, 0x18f54f8e, 0xcf17cfd6, + 0x6c41497f, 0xbba3c927, 0xf83c5fe9, 0x2fdedfb1, 0x8c885918, + 0x5b6ad940, 0x1154520b, 0xc6b6d253, 0x65e054fa, 0xb202d4a2, + 0xe2df7966, 0x353df93e, 0x966b7f97, 0x4189ffcf, 0x0bb77484, + 0xdc55f4dc, 0x7f037275, 0xa8e1f22d, 0xeb7e64e3, 0x3c9ce4bb, + 0x9fca6212, 0x4828e24a, 0x02166901, 0xd5f4e959, 0x76a26ff0, + 0xa140efa8}, + {0x00000000, 0xef52b6e1, 0x05d46b83, 0xea86dd62, 0x0ba8d706, + 0xe4fa61e7, 0x0e7cbc85, 0xe12e0a64, 0x1751ae0c, 0xf80318ed, + 0x1285c58f, 0xfdd7736e, 0x1cf9790a, 0xf3abcfeb, 0x192d1289, + 0xf67fa468, 0x2ea35c18, 0xc1f1eaf9, 0x2b77379b, 0xc425817a, + 0x250b8b1e, 0xca593dff, 0x20dfe09d, 0xcf8d567c, 0x39f2f214, + 0xd6a044f5, 0x3c269997, 0xd3742f76, 0x325a2512, 0xdd0893f3, + 0x378e4e91, 0xd8dcf870, 0x5d46b830, 0xb2140ed1, 0x5892d3b3, + 0xb7c06552, 0x56ee6f36, 0xb9bcd9d7, 0x533a04b5, 0xbc68b254, + 0x4a17163c, 0xa545a0dd, 0x4fc37dbf, 0xa091cb5e, 0x41bfc13a, + 0xaeed77db, 0x446baab9, 0xab391c58, 0x73e5e428, 0x9cb752c9, + 0x76318fab, 0x9963394a, 0x784d332e, 0x971f85cf, 0x7d9958ad, + 0x92cbee4c, 0x64b44a24, 0x8be6fcc5, 0x616021a7, 0x8e329746, + 0x6f1c9d22, 0x804e2bc3, 0x6ac8f6a1, 0x859a4040, 0xba8d7060, + 0x55dfc681, 0xbf591be3, 0x500bad02, 0xb125a766, 0x5e771187, + 0xb4f1cce5, 0x5ba37a04, 0xaddcde6c, 0x428e688d, 0xa808b5ef, + 0x475a030e, 0xa674096a, 0x4926bf8b, 0xa3a062e9, 0x4cf2d408, + 0x942e2c78, 0x7b7c9a99, 0x91fa47fb, 0x7ea8f11a, 0x9f86fb7e, + 0x70d44d9f, 0x9a5290fd, 0x7500261c, 0x837f8274, 0x6c2d3495, + 0x86abe9f7, 0x69f95f16, 0x88d75572, 0x6785e393, 0x8d033ef1, + 0x62518810, 0xe7cbc850, 0x08997eb1, 0xe21fa3d3, 0x0d4d1532, + 0xec631f56, 0x0331a9b7, 0xe9b774d5, 0x06e5c234, 0xf09a665c, + 0x1fc8d0bd, 0xf54e0ddf, 0x1a1cbb3e, 0xfb32b15a, 0x146007bb, + 0xfee6dad9, 0x11b46c38, 0xc9689448, 0x263a22a9, 0xccbcffcb, + 0x23ee492a, 0xc2c0434e, 0x2d92f5af, 0xc71428cd, 0x28469e2c, + 0xde393a44, 0x316b8ca5, 0xdbed51c7, 0x34bfe726, 0xd591ed42, + 0x3ac35ba3, 0xd04586c1, 0x3f173020, 0xae6be681, 0x41395060, + 0xabbf8d02, 0x44ed3be3, 0xa5c33187, 0x4a918766, 0xa0175a04, + 0x4f45ece5, 0xb93a488d, 0x5668fe6c, 0xbcee230e, 0x53bc95ef, + 0xb2929f8b, 0x5dc0296a, 0xb746f408, 0x581442e9, 0x80c8ba99, + 0x6f9a0c78, 0x851cd11a, 0x6a4e67fb, 0x8b606d9f, 0x6432db7e, + 0x8eb4061c, 0x61e6b0fd, 0x97991495, 0x78cba274, 0x924d7f16, + 0x7d1fc9f7, 0x9c31c393, 0x73637572, 0x99e5a810, 0x76b71ef1, + 0xf32d5eb1, 0x1c7fe850, 0xf6f93532, 0x19ab83d3, 0xf88589b7, + 0x17d73f56, 0xfd51e234, 0x120354d5, 0xe47cf0bd, 0x0b2e465c, + 0xe1a89b3e, 0x0efa2ddf, 0xefd427bb, 0x0086915a, 0xea004c38, + 0x0552fad9, 0xdd8e02a9, 0x32dcb448, 0xd85a692a, 0x3708dfcb, + 0xd626d5af, 0x3974634e, 0xd3f2be2c, 0x3ca008cd, 0xcadfaca5, + 0x258d1a44, 0xcf0bc726, 0x205971c7, 0xc1777ba3, 0x2e25cd42, + 0xc4a31020, 0x2bf1a6c1, 0x14e696e1, 0xfbb42000, 0x1132fd62, + 0xfe604b83, 0x1f4e41e7, 0xf01cf706, 0x1a9a2a64, 0xf5c89c85, + 0x03b738ed, 0xece58e0c, 0x0663536e, 0xe931e58f, 0x081fefeb, + 0xe74d590a, 0x0dcb8468, 0xe2993289, 0x3a45caf9, 0xd5177c18, + 0x3f91a17a, 0xd0c3179b, 0x31ed1dff, 0xdebfab1e, 0x3439767c, + 0xdb6bc09d, 0x2d1464f5, 0xc246d214, 0x28c00f76, 0xc792b997, + 0x26bcb3f3, 0xc9ee0512, 0x2368d870, 0xcc3a6e91, 0x49a02ed1, + 0xa6f29830, 0x4c744552, 0xa326f3b3, 0x4208f9d7, 0xad5a4f36, + 0x47dc9254, 0xa88e24b5, 0x5ef180dd, 0xb1a3363c, 0x5b25eb5e, + 0xb4775dbf, 0x555957db, 0xba0be13a, 0x508d3c58, 0xbfdf8ab9, + 0x670372c9, 0x8851c428, 0x62d7194a, 0x8d85afab, 0x6caba5cf, + 0x83f9132e, 0x697fce4c, 0x862d78ad, 0x7052dcc5, 0x9f006a24, + 0x7586b746, 0x9ad401a7, 0x7bfa0bc3, 0x94a8bd22, 0x7e2e6040, + 0x917cd6a1}, + {0x00000000, 0x87a6cb43, 0xd43c90c7, 0x539a5b84, 0x730827cf, + 0xf4aeec8c, 0xa734b708, 0x20927c4b, 0xe6104f9e, 0x61b684dd, + 0x322cdf59, 0xb58a141a, 0x95186851, 0x12bea312, 0x4124f896, + 0xc68233d5, 0x1751997d, 0x90f7523e, 0xc36d09ba, 0x44cbc2f9, + 0x6459beb2, 0xe3ff75f1, 0xb0652e75, 0x37c3e536, 0xf141d6e3, + 0x76e71da0, 0x257d4624, 0xa2db8d67, 0x8249f12c, 0x05ef3a6f, + 0x567561eb, 0xd1d3aaa8, 0x2ea332fa, 0xa905f9b9, 0xfa9fa23d, + 0x7d39697e, 0x5dab1535, 0xda0dde76, 0x899785f2, 0x0e314eb1, + 0xc8b37d64, 0x4f15b627, 0x1c8feda3, 0x9b2926e0, 0xbbbb5aab, + 0x3c1d91e8, 0x6f87ca6c, 0xe821012f, 0x39f2ab87, 0xbe5460c4, + 0xedce3b40, 0x6a68f003, 0x4afa8c48, 0xcd5c470b, 0x9ec61c8f, + 0x1960d7cc, 0xdfe2e419, 0x58442f5a, 0x0bde74de, 0x8c78bf9d, + 0xaceac3d6, 0x2b4c0895, 0x78d65311, 0xff709852, 0x5d4665f4, + 0xdae0aeb7, 0x897af533, 0x0edc3e70, 0x2e4e423b, 0xa9e88978, + 0xfa72d2fc, 0x7dd419bf, 0xbb562a6a, 0x3cf0e129, 0x6f6abaad, + 0xe8cc71ee, 0xc85e0da5, 0x4ff8c6e6, 0x1c629d62, 0x9bc45621, + 0x4a17fc89, 0xcdb137ca, 0x9e2b6c4e, 0x198da70d, 0x391fdb46, + 0xbeb91005, 0xed234b81, 0x6a8580c2, 0xac07b317, 0x2ba17854, + 0x783b23d0, 0xff9de893, 0xdf0f94d8, 0x58a95f9b, 0x0b33041f, + 0x8c95cf5c, 0x73e5570e, 0xf4439c4d, 0xa7d9c7c9, 0x207f0c8a, + 0x00ed70c1, 0x874bbb82, 0xd4d1e006, 0x53772b45, 0x95f51890, + 0x1253d3d3, 0x41c98857, 0xc66f4314, 0xe6fd3f5f, 0x615bf41c, + 0x32c1af98, 0xb56764db, 0x64b4ce73, 0xe3120530, 0xb0885eb4, + 0x372e95f7, 0x17bce9bc, 0x901a22ff, 0xc380797b, 0x4426b238, + 0x82a481ed, 0x05024aae, 0x5698112a, 0xd13eda69, 0xf1aca622, + 0x760a6d61, 0x259036e5, 0xa236fda6, 0xba8ccbe8, 0x3d2a00ab, + 0x6eb05b2f, 0xe916906c, 0xc984ec27, 0x4e222764, 0x1db87ce0, + 0x9a1eb7a3, 0x5c9c8476, 0xdb3a4f35, 0x88a014b1, 0x0f06dff2, + 0x2f94a3b9, 0xa83268fa, 0xfba8337e, 0x7c0ef83d, 0xaddd5295, + 0x2a7b99d6, 0x79e1c252, 0xfe470911, 0xded5755a, 0x5973be19, + 0x0ae9e59d, 0x8d4f2ede, 0x4bcd1d0b, 0xcc6bd648, 0x9ff18dcc, + 0x1857468f, 0x38c53ac4, 0xbf63f187, 0xecf9aa03, 0x6b5f6140, + 0x942ff912, 0x13893251, 0x401369d5, 0xc7b5a296, 0xe727dedd, + 0x6081159e, 0x331b4e1a, 0xb4bd8559, 0x723fb68c, 0xf5997dcf, + 0xa603264b, 0x21a5ed08, 0x01379143, 0x86915a00, 0xd50b0184, + 0x52adcac7, 0x837e606f, 0x04d8ab2c, 0x5742f0a8, 0xd0e43beb, + 0xf07647a0, 0x77d08ce3, 0x244ad767, 0xa3ec1c24, 0x656e2ff1, + 0xe2c8e4b2, 0xb152bf36, 0x36f47475, 0x1666083e, 0x91c0c37d, + 0xc25a98f9, 0x45fc53ba, 0xe7caae1c, 0x606c655f, 0x33f63edb, + 0xb450f598, 0x94c289d3, 0x13644290, 0x40fe1914, 0xc758d257, + 0x01dae182, 0x867c2ac1, 0xd5e67145, 0x5240ba06, 0x72d2c64d, + 0xf5740d0e, 0xa6ee568a, 0x21489dc9, 0xf09b3761, 0x773dfc22, + 0x24a7a7a6, 0xa3016ce5, 0x839310ae, 0x0435dbed, 0x57af8069, + 0xd0094b2a, 0x168b78ff, 0x912db3bc, 0xc2b7e838, 0x4511237b, + 0x65835f30, 0xe2259473, 0xb1bfcff7, 0x361904b4, 0xc9699ce6, + 0x4ecf57a5, 0x1d550c21, 0x9af3c762, 0xba61bb29, 0x3dc7706a, + 0x6e5d2bee, 0xe9fbe0ad, 0x2f79d378, 0xa8df183b, 0xfb4543bf, + 0x7ce388fc, 0x5c71f4b7, 0xdbd73ff4, 0x884d6470, 0x0febaf33, + 0xde38059b, 0x599eced8, 0x0a04955c, 0x8da25e1f, 0xad302254, + 0x2a96e917, 0x790cb293, 0xfeaa79d0, 0x38284a05, 0xbf8e8146, + 0xec14dac2, 0x6bb21181, 0x4b206dca, 0xcc86a689, 0x9f1cfd0d, + 0x18ba364e}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x43cba687, 0xc7903cd4, 0x845b9a53, 0xcf270873, + 0x8cecaef4, 0x08b734a7, 0x4b7c9220, 0x9e4f10e6, 0xdd84b661, + 0x59df2c32, 0x1a148ab5, 0x51681895, 0x12a3be12, 0x96f82441, + 0xd53382c6, 0x7d995117, 0x3e52f790, 0xba096dc3, 0xf9c2cb44, + 0xb2be5964, 0xf175ffe3, 0x752e65b0, 0x36e5c337, 0xe3d641f1, + 0xa01de776, 0x24467d25, 0x678ddba2, 0x2cf14982, 0x6f3aef05, + 0xeb617556, 0xa8aad3d1, 0xfa32a32e, 0xb9f905a9, 0x3da29ffa, + 0x7e69397d, 0x3515ab5d, 0x76de0dda, 0xf2859789, 0xb14e310e, + 0x647db3c8, 0x27b6154f, 0xa3ed8f1c, 0xe026299b, 0xab5abbbb, + 0xe8911d3c, 0x6cca876f, 0x2f0121e8, 0x87abf239, 0xc46054be, + 0x403bceed, 0x03f0686a, 0x488cfa4a, 0x0b475ccd, 0x8f1cc69e, + 0xccd76019, 0x19e4e2df, 0x5a2f4458, 0xde74de0b, 0x9dbf788c, + 0xd6c3eaac, 0x95084c2b, 0x1153d678, 0x529870ff, 0xf465465d, + 0xb7aee0da, 0x33f57a89, 0x703edc0e, 0x3b424e2e, 0x7889e8a9, + 0xfcd272fa, 0xbf19d47d, 0x6a2a56bb, 0x29e1f03c, 0xadba6a6f, + 0xee71cce8, 0xa50d5ec8, 0xe6c6f84f, 0x629d621c, 0x2156c49b, + 0x89fc174a, 0xca37b1cd, 0x4e6c2b9e, 0x0da78d19, 0x46db1f39, + 0x0510b9be, 0x814b23ed, 0xc280856a, 0x17b307ac, 0x5478a12b, + 0xd0233b78, 0x93e89dff, 0xd8940fdf, 0x9b5fa958, 0x1f04330b, + 0x5ccf958c, 0x0e57e573, 0x4d9c43f4, 0xc9c7d9a7, 0x8a0c7f20, + 0xc170ed00, 0x82bb4b87, 0x06e0d1d4, 0x452b7753, 0x9018f595, + 0xd3d35312, 0x5788c941, 0x14436fc6, 0x5f3ffde6, 0x1cf45b61, + 0x98afc132, 0xdb6467b5, 0x73ceb464, 0x300512e3, 0xb45e88b0, + 0xf7952e37, 0xbce9bc17, 0xff221a90, 0x7b7980c3, 0x38b22644, + 0xed81a482, 0xae4a0205, 0x2a119856, 0x69da3ed1, 0x22a6acf1, + 0x616d0a76, 0xe5369025, 0xa6fd36a2, 0xe8cb8cba, 0xab002a3d, + 0x2f5bb06e, 0x6c9016e9, 0x27ec84c9, 0x6427224e, 0xe07cb81d, + 0xa3b71e9a, 0x76849c5c, 0x354f3adb, 0xb114a088, 0xf2df060f, + 0xb9a3942f, 0xfa6832a8, 0x7e33a8fb, 0x3df80e7c, 0x9552ddad, + 0xd6997b2a, 0x52c2e179, 0x110947fe, 0x5a75d5de, 0x19be7359, + 0x9de5e90a, 0xde2e4f8d, 0x0b1dcd4b, 0x48d66bcc, 0xcc8df19f, + 0x8f465718, 0xc43ac538, 0x87f163bf, 0x03aaf9ec, 0x40615f6b, + 0x12f92f94, 0x51328913, 0xd5691340, 0x96a2b5c7, 0xddde27e7, + 0x9e158160, 0x1a4e1b33, 0x5985bdb4, 0x8cb63f72, 0xcf7d99f5, + 0x4b2603a6, 0x08eda521, 0x43913701, 0x005a9186, 0x84010bd5, + 0xc7caad52, 0x6f607e83, 0x2cabd804, 0xa8f04257, 0xeb3be4d0, + 0xa04776f0, 0xe38cd077, 0x67d74a24, 0x241ceca3, 0xf12f6e65, + 0xb2e4c8e2, 0x36bf52b1, 0x7574f436, 0x3e086616, 0x7dc3c091, + 0xf9985ac2, 0xba53fc45, 0x1caecae7, 0x5f656c60, 0xdb3ef633, + 0x98f550b4, 0xd389c294, 0x90426413, 0x1419fe40, 0x57d258c7, + 0x82e1da01, 0xc12a7c86, 0x4571e6d5, 0x06ba4052, 0x4dc6d272, + 0x0e0d74f5, 0x8a56eea6, 0xc99d4821, 0x61379bf0, 0x22fc3d77, + 0xa6a7a724, 0xe56c01a3, 0xae109383, 0xeddb3504, 0x6980af57, + 0x2a4b09d0, 0xff788b16, 0xbcb32d91, 0x38e8b7c2, 0x7b231145, + 0x305f8365, 0x739425e2, 0xf7cfbfb1, 0xb4041936, 0xe69c69c9, + 0xa557cf4e, 0x210c551d, 0x62c7f39a, 0x29bb61ba, 0x6a70c73d, + 0xee2b5d6e, 0xade0fbe9, 0x78d3792f, 0x3b18dfa8, 0xbf4345fb, + 0xfc88e37c, 0xb7f4715c, 0xf43fd7db, 0x70644d88, 0x33afeb0f, + 0x9b0538de, 0xd8ce9e59, 0x5c95040a, 0x1f5ea28d, 0x542230ad, + 0x17e9962a, 0x93b20c79, 0xd079aafe, 0x054a2838, 0x46818ebf, + 0xc2da14ec, 0x8111b26b, 0xca6d204b, 0x89a686cc, 0x0dfd1c9f, + 0x4e36ba18}, + {0x00000000, 0xe1b652ef, 0x836bd405, 0x62dd86ea, 0x06d7a80b, + 0xe761fae4, 0x85bc7c0e, 0x640a2ee1, 0x0cae5117, 0xed1803f8, + 0x8fc58512, 0x6e73d7fd, 0x0a79f91c, 0xebcfabf3, 0x89122d19, + 0x68a47ff6, 0x185ca32e, 0xf9eaf1c1, 0x9b37772b, 0x7a8125c4, + 0x1e8b0b25, 0xff3d59ca, 0x9de0df20, 0x7c568dcf, 0x14f2f239, + 0xf544a0d6, 0x9799263c, 0x762f74d3, 0x12255a32, 0xf39308dd, + 0x914e8e37, 0x70f8dcd8, 0x30b8465d, 0xd10e14b2, 0xb3d39258, + 0x5265c0b7, 0x366fee56, 0xd7d9bcb9, 0xb5043a53, 0x54b268bc, + 0x3c16174a, 0xdda045a5, 0xbf7dc34f, 0x5ecb91a0, 0x3ac1bf41, + 0xdb77edae, 0xb9aa6b44, 0x581c39ab, 0x28e4e573, 0xc952b79c, + 0xab8f3176, 0x4a396399, 0x2e334d78, 0xcf851f97, 0xad58997d, + 0x4ceecb92, 0x244ab464, 0xc5fce68b, 0xa7216061, 0x4697328e, + 0x229d1c6f, 0xc32b4e80, 0xa1f6c86a, 0x40409a85, 0x60708dba, + 0x81c6df55, 0xe31b59bf, 0x02ad0b50, 0x66a725b1, 0x8711775e, + 0xe5ccf1b4, 0x047aa35b, 0x6cdedcad, 0x8d688e42, 0xefb508a8, + 0x0e035a47, 0x6a0974a6, 0x8bbf2649, 0xe962a0a3, 0x08d4f24c, + 0x782c2e94, 0x999a7c7b, 0xfb47fa91, 0x1af1a87e, 0x7efb869f, + 0x9f4dd470, 0xfd90529a, 0x1c260075, 0x74827f83, 0x95342d6c, + 0xf7e9ab86, 0x165ff969, 0x7255d788, 0x93e38567, 0xf13e038d, + 0x10885162, 0x50c8cbe7, 0xb17e9908, 0xd3a31fe2, 0x32154d0d, + 0x561f63ec, 0xb7a93103, 0xd574b7e9, 0x34c2e506, 0x5c669af0, + 0xbdd0c81f, 0xdf0d4ef5, 0x3ebb1c1a, 0x5ab132fb, 0xbb076014, + 0xd9dae6fe, 0x386cb411, 0x489468c9, 0xa9223a26, 0xcbffbccc, + 0x2a49ee23, 0x4e43c0c2, 0xaff5922d, 0xcd2814c7, 0x2c9e4628, + 0x443a39de, 0xa58c6b31, 0xc751eddb, 0x26e7bf34, 0x42ed91d5, + 0xa35bc33a, 0xc18645d0, 0x2030173f, 0x81e66bae, 0x60503941, + 0x028dbfab, 0xe33bed44, 0x8731c3a5, 0x6687914a, 0x045a17a0, + 0xe5ec454f, 0x8d483ab9, 0x6cfe6856, 0x0e23eebc, 0xef95bc53, + 0x8b9f92b2, 0x6a29c05d, 0x08f446b7, 0xe9421458, 0x99bac880, + 0x780c9a6f, 0x1ad11c85, 0xfb674e6a, 0x9f6d608b, 0x7edb3264, + 0x1c06b48e, 0xfdb0e661, 0x95149997, 0x74a2cb78, 0x167f4d92, + 0xf7c91f7d, 0x93c3319c, 0x72756373, 0x10a8e599, 0xf11eb776, + 0xb15e2df3, 0x50e87f1c, 0x3235f9f6, 0xd383ab19, 0xb78985f8, + 0x563fd717, 0x34e251fd, 0xd5540312, 0xbdf07ce4, 0x5c462e0b, + 0x3e9ba8e1, 0xdf2dfa0e, 0xbb27d4ef, 0x5a918600, 0x384c00ea, + 0xd9fa5205, 0xa9028edd, 0x48b4dc32, 0x2a695ad8, 0xcbdf0837, + 0xafd526d6, 0x4e637439, 0x2cbef2d3, 0xcd08a03c, 0xa5acdfca, + 0x441a8d25, 0x26c70bcf, 0xc7715920, 0xa37b77c1, 0x42cd252e, + 0x2010a3c4, 0xc1a6f12b, 0xe196e614, 0x0020b4fb, 0x62fd3211, + 0x834b60fe, 0xe7414e1f, 0x06f71cf0, 0x642a9a1a, 0x859cc8f5, + 0xed38b703, 0x0c8ee5ec, 0x6e536306, 0x8fe531e9, 0xebef1f08, + 0x0a594de7, 0x6884cb0d, 0x893299e2, 0xf9ca453a, 0x187c17d5, + 0x7aa1913f, 0x9b17c3d0, 0xff1ded31, 0x1eabbfde, 0x7c763934, + 0x9dc06bdb, 0xf564142d, 0x14d246c2, 0x760fc028, 0x97b992c7, + 0xf3b3bc26, 0x1205eec9, 0x70d86823, 0x916e3acc, 0xd12ea049, + 0x3098f2a6, 0x5245744c, 0xb3f326a3, 0xd7f90842, 0x364f5aad, + 0x5492dc47, 0xb5248ea8, 0xdd80f15e, 0x3c36a3b1, 0x5eeb255b, + 0xbf5d77b4, 0xdb575955, 0x3ae10bba, 0x583c8d50, 0xb98adfbf, + 0xc9720367, 0x28c45188, 0x4a19d762, 0xabaf858d, 0xcfa5ab6c, + 0x2e13f983, 0x4cce7f69, 0xad782d86, 0xc5dc5270, 0x246a009f, + 0x46b78675, 0xa701d49a, 0xc30bfa7b, 0x22bda894, 0x40602e7e, + 0xa1d67c91}, + {0x00000000, 0x5880e2d7, 0xf106b474, 0xa98656a3, 0xe20d68e9, + 0xba8d8a3e, 0x130bdc9d, 0x4b8b3e4a, 0x851da109, 0xdd9d43de, + 0x741b157d, 0x2c9bf7aa, 0x6710c9e0, 0x3f902b37, 0x96167d94, + 0xce969f43, 0x0a3b4213, 0x52bba0c4, 0xfb3df667, 0xa3bd14b0, + 0xe8362afa, 0xb0b6c82d, 0x19309e8e, 0x41b07c59, 0x8f26e31a, + 0xd7a601cd, 0x7e20576e, 0x26a0b5b9, 0x6d2b8bf3, 0x35ab6924, + 0x9c2d3f87, 0xc4addd50, 0x14768426, 0x4cf666f1, 0xe5703052, + 0xbdf0d285, 0xf67beccf, 0xaefb0e18, 0x077d58bb, 0x5ffdba6c, + 0x916b252f, 0xc9ebc7f8, 0x606d915b, 0x38ed738c, 0x73664dc6, + 0x2be6af11, 0x8260f9b2, 0xdae01b65, 0x1e4dc635, 0x46cd24e2, + 0xef4b7241, 0xb7cb9096, 0xfc40aedc, 0xa4c04c0b, 0x0d461aa8, + 0x55c6f87f, 0x9b50673c, 0xc3d085eb, 0x6a56d348, 0x32d6319f, + 0x795d0fd5, 0x21dded02, 0x885bbba1, 0xd0db5976, 0x28ec084d, + 0x706cea9a, 0xd9eabc39, 0x816a5eee, 0xcae160a4, 0x92618273, + 0x3be7d4d0, 0x63673607, 0xadf1a944, 0xf5714b93, 0x5cf71d30, + 0x0477ffe7, 0x4ffcc1ad, 0x177c237a, 0xbefa75d9, 0xe67a970e, + 0x22d74a5e, 0x7a57a889, 0xd3d1fe2a, 0x8b511cfd, 0xc0da22b7, + 0x985ac060, 0x31dc96c3, 0x695c7414, 0xa7caeb57, 0xff4a0980, + 0x56cc5f23, 0x0e4cbdf4, 0x45c783be, 0x1d476169, 0xb4c137ca, + 0xec41d51d, 0x3c9a8c6b, 0x641a6ebc, 0xcd9c381f, 0x951cdac8, + 0xde97e482, 0x86170655, 0x2f9150f6, 0x7711b221, 0xb9872d62, + 0xe107cfb5, 0x48819916, 0x10017bc1, 0x5b8a458b, 0x030aa75c, + 0xaa8cf1ff, 0xf20c1328, 0x36a1ce78, 0x6e212caf, 0xc7a77a0c, + 0x9f2798db, 0xd4aca691, 0x8c2c4446, 0x25aa12e5, 0x7d2af032, + 0xb3bc6f71, 0xeb3c8da6, 0x42badb05, 0x1a3a39d2, 0x51b10798, + 0x0931e54f, 0xa0b7b3ec, 0xf837513b, 0x50d8119a, 0x0858f34d, + 0xa1dea5ee, 0xf95e4739, 0xb2d57973, 0xea559ba4, 0x43d3cd07, + 0x1b532fd0, 0xd5c5b093, 0x8d455244, 0x24c304e7, 0x7c43e630, + 0x37c8d87a, 0x6f483aad, 0xc6ce6c0e, 0x9e4e8ed9, 0x5ae35389, + 0x0263b15e, 0xabe5e7fd, 0xf365052a, 0xb8ee3b60, 0xe06ed9b7, + 0x49e88f14, 0x11686dc3, 0xdffef280, 0x877e1057, 0x2ef846f4, + 0x7678a423, 0x3df39a69, 0x657378be, 0xccf52e1d, 0x9475ccca, + 0x44ae95bc, 0x1c2e776b, 0xb5a821c8, 0xed28c31f, 0xa6a3fd55, + 0xfe231f82, 0x57a54921, 0x0f25abf6, 0xc1b334b5, 0x9933d662, + 0x30b580c1, 0x68356216, 0x23be5c5c, 0x7b3ebe8b, 0xd2b8e828, + 0x8a380aff, 0x4e95d7af, 0x16153578, 0xbf9363db, 0xe713810c, + 0xac98bf46, 0xf4185d91, 0x5d9e0b32, 0x051ee9e5, 0xcb8876a6, + 0x93089471, 0x3a8ec2d2, 0x620e2005, 0x29851e4f, 0x7105fc98, + 0xd883aa3b, 0x800348ec, 0x783419d7, 0x20b4fb00, 0x8932ada3, + 0xd1b24f74, 0x9a39713e, 0xc2b993e9, 0x6b3fc54a, 0x33bf279d, + 0xfd29b8de, 0xa5a95a09, 0x0c2f0caa, 0x54afee7d, 0x1f24d037, + 0x47a432e0, 0xee226443, 0xb6a28694, 0x720f5bc4, 0x2a8fb913, + 0x8309efb0, 0xdb890d67, 0x9002332d, 0xc882d1fa, 0x61048759, + 0x3984658e, 0xf712facd, 0xaf92181a, 0x06144eb9, 0x5e94ac6e, + 0x151f9224, 0x4d9f70f3, 0xe4192650, 0xbc99c487, 0x6c429df1, + 0x34c27f26, 0x9d442985, 0xc5c4cb52, 0x8e4ff518, 0xd6cf17cf, + 0x7f49416c, 0x27c9a3bb, 0xe95f3cf8, 0xb1dfde2f, 0x1859888c, + 0x40d96a5b, 0x0b525411, 0x53d2b6c6, 0xfa54e065, 0xa2d402b2, + 0x6679dfe2, 0x3ef93d35, 0x977f6b96, 0xcfff8941, 0x8474b70b, + 0xdcf455dc, 0x7572037f, 0x2df2e1a8, 0xe3647eeb, 0xbbe49c3c, + 0x1262ca9f, 0x4ae22848, 0x01691602, 0x59e9f4d5, 0xf06fa276, + 0xa8ef40a1}, + {0x00000000, 0x463b6765, 0x8c76ceca, 0xca4da9af, 0x59ebed4e, + 0x1fd08a2b, 0xd59d2384, 0x93a644e1, 0xb2d6db9d, 0xf4edbcf8, + 0x3ea01557, 0x789b7232, 0xeb3d36d3, 0xad0651b6, 0x674bf819, + 0x21709f7c, 0x25abc6e0, 0x6390a185, 0xa9dd082a, 0xefe66f4f, + 0x7c402bae, 0x3a7b4ccb, 0xf036e564, 0xb60d8201, 0x977d1d7d, + 0xd1467a18, 0x1b0bd3b7, 0x5d30b4d2, 0xce96f033, 0x88ad9756, + 0x42e03ef9, 0x04db599c, 0x0b50fc1a, 0x4d6b9b7f, 0x872632d0, + 0xc11d55b5, 0x52bb1154, 0x14807631, 0xdecddf9e, 0x98f6b8fb, + 0xb9862787, 0xffbd40e2, 0x35f0e94d, 0x73cb8e28, 0xe06dcac9, + 0xa656adac, 0x6c1b0403, 0x2a206366, 0x2efb3afa, 0x68c05d9f, + 0xa28df430, 0xe4b69355, 0x7710d7b4, 0x312bb0d1, 0xfb66197e, + 0xbd5d7e1b, 0x9c2de167, 0xda168602, 0x105b2fad, 0x566048c8, + 0xc5c60c29, 0x83fd6b4c, 0x49b0c2e3, 0x0f8ba586, 0x16a0f835, + 0x509b9f50, 0x9ad636ff, 0xdced519a, 0x4f4b157b, 0x0970721e, + 0xc33ddbb1, 0x8506bcd4, 0xa47623a8, 0xe24d44cd, 0x2800ed62, + 0x6e3b8a07, 0xfd9dcee6, 0xbba6a983, 0x71eb002c, 0x37d06749, + 0x330b3ed5, 0x753059b0, 0xbf7df01f, 0xf946977a, 0x6ae0d39b, + 0x2cdbb4fe, 0xe6961d51, 0xa0ad7a34, 0x81dde548, 0xc7e6822d, + 0x0dab2b82, 0x4b904ce7, 0xd8360806, 0x9e0d6f63, 0x5440c6cc, + 0x127ba1a9, 0x1df0042f, 0x5bcb634a, 0x9186cae5, 0xd7bdad80, + 0x441be961, 0x02208e04, 0xc86d27ab, 0x8e5640ce, 0xaf26dfb2, + 0xe91db8d7, 0x23501178, 0x656b761d, 0xf6cd32fc, 0xb0f65599, + 0x7abbfc36, 0x3c809b53, 0x385bc2cf, 0x7e60a5aa, 0xb42d0c05, + 0xf2166b60, 0x61b02f81, 0x278b48e4, 0xedc6e14b, 0xabfd862e, + 0x8a8d1952, 0xccb67e37, 0x06fbd798, 0x40c0b0fd, 0xd366f41c, + 0x955d9379, 0x5f103ad6, 0x192b5db3, 0x2c40f16b, 0x6a7b960e, + 0xa0363fa1, 0xe60d58c4, 0x75ab1c25, 0x33907b40, 0xf9ddd2ef, + 0xbfe6b58a, 0x9e962af6, 0xd8ad4d93, 0x12e0e43c, 0x54db8359, + 0xc77dc7b8, 0x8146a0dd, 0x4b0b0972, 0x0d306e17, 0x09eb378b, + 0x4fd050ee, 0x859df941, 0xc3a69e24, 0x5000dac5, 0x163bbda0, + 0xdc76140f, 0x9a4d736a, 0xbb3dec16, 0xfd068b73, 0x374b22dc, + 0x717045b9, 0xe2d60158, 0xa4ed663d, 0x6ea0cf92, 0x289ba8f7, + 0x27100d71, 0x612b6a14, 0xab66c3bb, 0xed5da4de, 0x7efbe03f, + 0x38c0875a, 0xf28d2ef5, 0xb4b64990, 0x95c6d6ec, 0xd3fdb189, + 0x19b01826, 0x5f8b7f43, 0xcc2d3ba2, 0x8a165cc7, 0x405bf568, + 0x0660920d, 0x02bbcb91, 0x4480acf4, 0x8ecd055b, 0xc8f6623e, + 0x5b5026df, 0x1d6b41ba, 0xd726e815, 0x911d8f70, 0xb06d100c, + 0xf6567769, 0x3c1bdec6, 0x7a20b9a3, 0xe986fd42, 0xafbd9a27, + 0x65f03388, 0x23cb54ed, 0x3ae0095e, 0x7cdb6e3b, 0xb696c794, + 0xf0ada0f1, 0x630be410, 0x25308375, 0xef7d2ada, 0xa9464dbf, + 0x8836d2c3, 0xce0db5a6, 0x04401c09, 0x427b7b6c, 0xd1dd3f8d, + 0x97e658e8, 0x5dabf147, 0x1b909622, 0x1f4bcfbe, 0x5970a8db, + 0x933d0174, 0xd5066611, 0x46a022f0, 0x009b4595, 0xcad6ec3a, + 0x8ced8b5f, 0xad9d1423, 0xeba67346, 0x21ebdae9, 0x67d0bd8c, + 0xf476f96d, 0xb24d9e08, 0x780037a7, 0x3e3b50c2, 0x31b0f544, + 0x778b9221, 0xbdc63b8e, 0xfbfd5ceb, 0x685b180a, 0x2e607f6f, + 0xe42dd6c0, 0xa216b1a5, 0x83662ed9, 0xc55d49bc, 0x0f10e013, + 0x492b8776, 0xda8dc397, 0x9cb6a4f2, 0x56fb0d5d, 0x10c06a38, + 0x141b33a4, 0x522054c1, 0x986dfd6e, 0xde569a0b, 0x4df0deea, + 0x0bcbb98f, 0xc1861020, 0x87bd7745, 0xa6cde839, 0xe0f68f5c, + 0x2abb26f3, 0x6c804196, 0xff260577, 0xb91d6212, 0x7350cbbd, + 0x356bacd8}}; + +#endif + +#endif + +#if N == 6 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x3db1ecdc, 0x7b63d9b8, 0x46d23564, 0xf6c7b370, + 0xcb765fac, 0x8da46ac8, 0xb0158614, 0x36fe60a1, 0x0b4f8c7d, + 0x4d9db919, 0x702c55c5, 0xc039d3d1, 0xfd883f0d, 0xbb5a0a69, + 0x86ebe6b5, 0x6dfcc142, 0x504d2d9e, 0x169f18fa, 0x2b2ef426, + 0x9b3b7232, 0xa68a9eee, 0xe058ab8a, 0xdde94756, 0x5b02a1e3, + 0x66b34d3f, 0x2061785b, 0x1dd09487, 0xadc51293, 0x9074fe4f, + 0xd6a6cb2b, 0xeb1727f7, 0xdbf98284, 0xe6486e58, 0xa09a5b3c, + 0x9d2bb7e0, 0x2d3e31f4, 0x108fdd28, 0x565de84c, 0x6bec0490, + 0xed07e225, 0xd0b60ef9, 0x96643b9d, 0xabd5d741, 0x1bc05155, + 0x2671bd89, 0x60a388ed, 0x5d126431, 0xb60543c6, 0x8bb4af1a, + 0xcd669a7e, 0xf0d776a2, 0x40c2f0b6, 0x7d731c6a, 0x3ba1290e, + 0x0610c5d2, 0x80fb2367, 0xbd4acfbb, 0xfb98fadf, 0xc6291603, + 0x763c9017, 0x4b8d7ccb, 0x0d5f49af, 0x30eea573, 0x6c820349, + 0x5133ef95, 0x17e1daf1, 0x2a50362d, 0x9a45b039, 0xa7f45ce5, + 0xe1266981, 0xdc97855d, 0x5a7c63e8, 0x67cd8f34, 0x211fba50, + 0x1cae568c, 0xacbbd098, 0x910a3c44, 0xd7d80920, 0xea69e5fc, + 0x017ec20b, 0x3ccf2ed7, 0x7a1d1bb3, 0x47acf76f, 0xf7b9717b, + 0xca089da7, 0x8cdaa8c3, 0xb16b441f, 0x3780a2aa, 0x0a314e76, + 0x4ce37b12, 0x715297ce, 0xc14711da, 0xfcf6fd06, 0xba24c862, + 0x879524be, 0xb77b81cd, 0x8aca6d11, 0xcc185875, 0xf1a9b4a9, + 0x41bc32bd, 0x7c0dde61, 0x3adfeb05, 0x076e07d9, 0x8185e16c, + 0xbc340db0, 0xfae638d4, 0xc757d408, 0x7742521c, 0x4af3bec0, + 0x0c218ba4, 0x31906778, 0xda87408f, 0xe736ac53, 0xa1e49937, + 0x9c5575eb, 0x2c40f3ff, 0x11f11f23, 0x57232a47, 0x6a92c69b, + 0xec79202e, 0xd1c8ccf2, 0x971af996, 0xaaab154a, 0x1abe935e, + 0x270f7f82, 0x61dd4ae6, 0x5c6ca63a, 0xd9040692, 0xe4b5ea4e, + 0xa267df2a, 0x9fd633f6, 0x2fc3b5e2, 0x1272593e, 0x54a06c5a, + 0x69118086, 0xeffa6633, 0xd24b8aef, 0x9499bf8b, 0xa9285357, + 0x193dd543, 0x248c399f, 0x625e0cfb, 0x5fefe027, 0xb4f8c7d0, + 0x89492b0c, 0xcf9b1e68, 0xf22af2b4, 0x423f74a0, 0x7f8e987c, + 0x395cad18, 0x04ed41c4, 0x8206a771, 0xbfb74bad, 0xf9657ec9, + 0xc4d49215, 0x74c11401, 0x4970f8dd, 0x0fa2cdb9, 0x32132165, + 0x02fd8416, 0x3f4c68ca, 0x799e5dae, 0x442fb172, 0xf43a3766, + 0xc98bdbba, 0x8f59eede, 0xb2e80202, 0x3403e4b7, 0x09b2086b, + 0x4f603d0f, 0x72d1d1d3, 0xc2c457c7, 0xff75bb1b, 0xb9a78e7f, + 0x841662a3, 0x6f014554, 0x52b0a988, 0x14629cec, 0x29d37030, + 0x99c6f624, 0xa4771af8, 0xe2a52f9c, 0xdf14c340, 0x59ff25f5, + 0x644ec929, 0x229cfc4d, 0x1f2d1091, 0xaf389685, 0x92897a59, + 0xd45b4f3d, 0xe9eaa3e1, 0xb58605db, 0x8837e907, 0xcee5dc63, + 0xf35430bf, 0x4341b6ab, 0x7ef05a77, 0x38226f13, 0x059383cf, + 0x8378657a, 0xbec989a6, 0xf81bbcc2, 0xc5aa501e, 0x75bfd60a, + 0x480e3ad6, 0x0edc0fb2, 0x336de36e, 0xd87ac499, 0xe5cb2845, + 0xa3191d21, 0x9ea8f1fd, 0x2ebd77e9, 0x130c9b35, 0x55deae51, + 0x686f428d, 0xee84a438, 0xd33548e4, 0x95e77d80, 0xa856915c, + 0x18431748, 0x25f2fb94, 0x6320cef0, 0x5e91222c, 0x6e7f875f, + 0x53ce6b83, 0x151c5ee7, 0x28adb23b, 0x98b8342f, 0xa509d8f3, + 0xe3dbed97, 0xde6a014b, 0x5881e7fe, 0x65300b22, 0x23e23e46, + 0x1e53d29a, 0xae46548e, 0x93f7b852, 0xd5258d36, 0xe89461ea, + 0x0383461d, 0x3e32aac1, 0x78e09fa5, 0x45517379, 0xf544f56d, + 0xc8f519b1, 0x8e272cd5, 0xb396c009, 0x357d26bc, 0x08ccca60, + 0x4e1eff04, 0x73af13d8, 0xc3ba95cc, 0xfe0b7910, 0xb8d94c74, + 0x8568a0a8}, + {0x00000000, 0x69790b65, 0xd2f216ca, 0xbb8b1daf, 0x7e952bd5, + 0x17ec20b0, 0xac673d1f, 0xc51e367a, 0xfd2a57aa, 0x94535ccf, + 0x2fd84160, 0x46a14a05, 0x83bf7c7f, 0xeac6771a, 0x514d6ab5, + 0x383461d0, 0x2125a915, 0x485ca270, 0xf3d7bfdf, 0x9aaeb4ba, + 0x5fb082c0, 0x36c989a5, 0x8d42940a, 0xe43b9f6f, 0xdc0ffebf, + 0xb576f5da, 0x0efde875, 0x6784e310, 0xa29ad56a, 0xcbe3de0f, + 0x7068c3a0, 0x1911c8c5, 0x424b522a, 0x2b32594f, 0x90b944e0, + 0xf9c04f85, 0x3cde79ff, 0x55a7729a, 0xee2c6f35, 0x87556450, + 0xbf610580, 0xd6180ee5, 0x6d93134a, 0x04ea182f, 0xc1f42e55, + 0xa88d2530, 0x1306389f, 0x7a7f33fa, 0x636efb3f, 0x0a17f05a, + 0xb19cedf5, 0xd8e5e690, 0x1dfbd0ea, 0x7482db8f, 0xcf09c620, + 0xa670cd45, 0x9e44ac95, 0xf73da7f0, 0x4cb6ba5f, 0x25cfb13a, + 0xe0d18740, 0x89a88c25, 0x3223918a, 0x5b5a9aef, 0x8496a454, + 0xedefaf31, 0x5664b29e, 0x3f1db9fb, 0xfa038f81, 0x937a84e4, + 0x28f1994b, 0x4188922e, 0x79bcf3fe, 0x10c5f89b, 0xab4ee534, + 0xc237ee51, 0x0729d82b, 0x6e50d34e, 0xd5dbcee1, 0xbca2c584, + 0xa5b30d41, 0xccca0624, 0x77411b8b, 0x1e3810ee, 0xdb262694, + 0xb25f2df1, 0x09d4305e, 0x60ad3b3b, 0x58995aeb, 0x31e0518e, + 0x8a6b4c21, 0xe3124744, 0x260c713e, 0x4f757a5b, 0xf4fe67f4, + 0x9d876c91, 0xc6ddf67e, 0xafa4fd1b, 0x142fe0b4, 0x7d56ebd1, + 0xb848ddab, 0xd131d6ce, 0x6abacb61, 0x03c3c004, 0x3bf7a1d4, + 0x528eaab1, 0xe905b71e, 0x807cbc7b, 0x45628a01, 0x2c1b8164, + 0x97909ccb, 0xfee997ae, 0xe7f85f6b, 0x8e81540e, 0x350a49a1, + 0x5c7342c4, 0x996d74be, 0xf0147fdb, 0x4b9f6274, 0x22e66911, + 0x1ad208c1, 0x73ab03a4, 0xc8201e0b, 0xa159156e, 0x64472314, + 0x0d3e2871, 0xb6b535de, 0xdfcc3ebb, 0xd25c4ee9, 0xbb25458c, + 0x00ae5823, 0x69d75346, 0xacc9653c, 0xc5b06e59, 0x7e3b73f6, + 0x17427893, 0x2f761943, 0x460f1226, 0xfd840f89, 0x94fd04ec, + 0x51e33296, 0x389a39f3, 0x8311245c, 0xea682f39, 0xf379e7fc, + 0x9a00ec99, 0x218bf136, 0x48f2fa53, 0x8deccc29, 0xe495c74c, + 0x5f1edae3, 0x3667d186, 0x0e53b056, 0x672abb33, 0xdca1a69c, + 0xb5d8adf9, 0x70c69b83, 0x19bf90e6, 0xa2348d49, 0xcb4d862c, + 0x90171cc3, 0xf96e17a6, 0x42e50a09, 0x2b9c016c, 0xee823716, + 0x87fb3c73, 0x3c7021dc, 0x55092ab9, 0x6d3d4b69, 0x0444400c, + 0xbfcf5da3, 0xd6b656c6, 0x13a860bc, 0x7ad16bd9, 0xc15a7676, + 0xa8237d13, 0xb132b5d6, 0xd84bbeb3, 0x63c0a31c, 0x0ab9a879, + 0xcfa79e03, 0xa6de9566, 0x1d5588c9, 0x742c83ac, 0x4c18e27c, + 0x2561e919, 0x9eeaf4b6, 0xf793ffd3, 0x328dc9a9, 0x5bf4c2cc, + 0xe07fdf63, 0x8906d406, 0x56caeabd, 0x3fb3e1d8, 0x8438fc77, + 0xed41f712, 0x285fc168, 0x4126ca0d, 0xfaadd7a2, 0x93d4dcc7, + 0xabe0bd17, 0xc299b672, 0x7912abdd, 0x106ba0b8, 0xd57596c2, + 0xbc0c9da7, 0x07878008, 0x6efe8b6d, 0x77ef43a8, 0x1e9648cd, + 0xa51d5562, 0xcc645e07, 0x097a687d, 0x60036318, 0xdb887eb7, + 0xb2f175d2, 0x8ac51402, 0xe3bc1f67, 0x583702c8, 0x314e09ad, + 0xf4503fd7, 0x9d2934b2, 0x26a2291d, 0x4fdb2278, 0x1481b897, + 0x7df8b3f2, 0xc673ae5d, 0xaf0aa538, 0x6a149342, 0x036d9827, + 0xb8e68588, 0xd19f8eed, 0xe9abef3d, 0x80d2e458, 0x3b59f9f7, + 0x5220f292, 0x973ec4e8, 0xfe47cf8d, 0x45ccd222, 0x2cb5d947, + 0x35a41182, 0x5cdd1ae7, 0xe7560748, 0x8e2f0c2d, 0x4b313a57, + 0x22483132, 0x99c32c9d, 0xf0ba27f8, 0xc88e4628, 0xa1f74d4d, + 0x1a7c50e2, 0x73055b87, 0xb61b6dfd, 0xdf626698, 0x64e97b37, + 0x0d907052}, + {0x00000000, 0x7fc99b93, 0xff933726, 0x805aacb5, 0x2457680d, + 0x5b9ef39e, 0xdbc45f2b, 0xa40dc4b8, 0x48aed01a, 0x37674b89, + 0xb73de73c, 0xc8f47caf, 0x6cf9b817, 0x13302384, 0x936a8f31, + 0xeca314a2, 0x915da034, 0xee943ba7, 0x6ece9712, 0x11070c81, + 0xb50ac839, 0xcac353aa, 0x4a99ff1f, 0x3550648c, 0xd9f3702e, + 0xa63aebbd, 0x26604708, 0x59a9dc9b, 0xfda41823, 0x826d83b0, + 0x02372f05, 0x7dfeb496, 0xf9ca4629, 0x8603ddba, 0x0659710f, + 0x7990ea9c, 0xdd9d2e24, 0xa254b5b7, 0x220e1902, 0x5dc78291, + 0xb1649633, 0xcead0da0, 0x4ef7a115, 0x313e3a86, 0x9533fe3e, + 0xeafa65ad, 0x6aa0c918, 0x1569528b, 0x6897e61d, 0x175e7d8e, + 0x9704d13b, 0xe8cd4aa8, 0x4cc08e10, 0x33091583, 0xb353b936, + 0xcc9a22a5, 0x20393607, 0x5ff0ad94, 0xdfaa0121, 0xa0639ab2, + 0x046e5e0a, 0x7ba7c599, 0xfbfd692c, 0x8434f2bf, 0x28e58a13, + 0x572c1180, 0xd776bd35, 0xa8bf26a6, 0x0cb2e21e, 0x737b798d, + 0xf321d538, 0x8ce84eab, 0x604b5a09, 0x1f82c19a, 0x9fd86d2f, + 0xe011f6bc, 0x441c3204, 0x3bd5a997, 0xbb8f0522, 0xc4469eb1, + 0xb9b82a27, 0xc671b1b4, 0x462b1d01, 0x39e28692, 0x9def422a, + 0xe226d9b9, 0x627c750c, 0x1db5ee9f, 0xf116fa3d, 0x8edf61ae, + 0x0e85cd1b, 0x714c5688, 0xd5419230, 0xaa8809a3, 0x2ad2a516, + 0x551b3e85, 0xd12fcc3a, 0xaee657a9, 0x2ebcfb1c, 0x5175608f, + 0xf578a437, 0x8ab13fa4, 0x0aeb9311, 0x75220882, 0x99811c20, + 0xe64887b3, 0x66122b06, 0x19dbb095, 0xbdd6742d, 0xc21fefbe, + 0x4245430b, 0x3d8cd898, 0x40726c0e, 0x3fbbf79d, 0xbfe15b28, + 0xc028c0bb, 0x64250403, 0x1bec9f90, 0x9bb63325, 0xe47fa8b6, + 0x08dcbc14, 0x77152787, 0xf74f8b32, 0x888610a1, 0x2c8bd419, + 0x53424f8a, 0xd318e33f, 0xacd178ac, 0x51cb1426, 0x2e028fb5, + 0xae582300, 0xd191b893, 0x759c7c2b, 0x0a55e7b8, 0x8a0f4b0d, + 0xf5c6d09e, 0x1965c43c, 0x66ac5faf, 0xe6f6f31a, 0x993f6889, + 0x3d32ac31, 0x42fb37a2, 0xc2a19b17, 0xbd680084, 0xc096b412, + 0xbf5f2f81, 0x3f058334, 0x40cc18a7, 0xe4c1dc1f, 0x9b08478c, + 0x1b52eb39, 0x649b70aa, 0x88386408, 0xf7f1ff9b, 0x77ab532e, + 0x0862c8bd, 0xac6f0c05, 0xd3a69796, 0x53fc3b23, 0x2c35a0b0, + 0xa801520f, 0xd7c8c99c, 0x57926529, 0x285bfeba, 0x8c563a02, + 0xf39fa191, 0x73c50d24, 0x0c0c96b7, 0xe0af8215, 0x9f661986, + 0x1f3cb533, 0x60f52ea0, 0xc4f8ea18, 0xbb31718b, 0x3b6bdd3e, + 0x44a246ad, 0x395cf23b, 0x469569a8, 0xc6cfc51d, 0xb9065e8e, + 0x1d0b9a36, 0x62c201a5, 0xe298ad10, 0x9d513683, 0x71f22221, + 0x0e3bb9b2, 0x8e611507, 0xf1a88e94, 0x55a54a2c, 0x2a6cd1bf, + 0xaa367d0a, 0xd5ffe699, 0x792e9e35, 0x06e705a6, 0x86bda913, + 0xf9743280, 0x5d79f638, 0x22b06dab, 0xa2eac11e, 0xdd235a8d, + 0x31804e2f, 0x4e49d5bc, 0xce137909, 0xb1dae29a, 0x15d72622, + 0x6a1ebdb1, 0xea441104, 0x958d8a97, 0xe8733e01, 0x97baa592, + 0x17e00927, 0x682992b4, 0xcc24560c, 0xb3edcd9f, 0x33b7612a, + 0x4c7efab9, 0xa0ddee1b, 0xdf147588, 0x5f4ed93d, 0x208742ae, + 0x848a8616, 0xfb431d85, 0x7b19b130, 0x04d02aa3, 0x80e4d81c, + 0xff2d438f, 0x7f77ef3a, 0x00be74a9, 0xa4b3b011, 0xdb7a2b82, + 0x5b208737, 0x24e91ca4, 0xc84a0806, 0xb7839395, 0x37d93f20, + 0x4810a4b3, 0xec1d600b, 0x93d4fb98, 0x138e572d, 0x6c47ccbe, + 0x11b97828, 0x6e70e3bb, 0xee2a4f0e, 0x91e3d49d, 0x35ee1025, + 0x4a278bb6, 0xca7d2703, 0xb5b4bc90, 0x5917a832, 0x26de33a1, + 0xa6849f14, 0xd94d0487, 0x7d40c03f, 0x02895bac, 0x82d3f719, + 0xfd1a6c8a}, + {0x00000000, 0xa396284c, 0x9c5d56d9, 0x3fcb7e95, 0xe3cbabf3, + 0x405d83bf, 0x7f96fd2a, 0xdc00d566, 0x1ce651a7, 0xbf7079eb, + 0x80bb077e, 0x232d2f32, 0xff2dfa54, 0x5cbbd218, 0x6370ac8d, + 0xc0e684c1, 0x39cca34e, 0x9a5a8b02, 0xa591f597, 0x0607dddb, + 0xda0708bd, 0x799120f1, 0x465a5e64, 0xe5cc7628, 0x252af2e9, + 0x86bcdaa5, 0xb977a430, 0x1ae18c7c, 0xc6e1591a, 0x65777156, + 0x5abc0fc3, 0xf92a278f, 0x7399469c, 0xd00f6ed0, 0xefc41045, + 0x4c523809, 0x9052ed6f, 0x33c4c523, 0x0c0fbbb6, 0xaf9993fa, + 0x6f7f173b, 0xcce93f77, 0xf32241e2, 0x50b469ae, 0x8cb4bcc8, + 0x2f229484, 0x10e9ea11, 0xb37fc25d, 0x4a55e5d2, 0xe9c3cd9e, + 0xd608b30b, 0x759e9b47, 0xa99e4e21, 0x0a08666d, 0x35c318f8, + 0x965530b4, 0x56b3b475, 0xf5259c39, 0xcaeee2ac, 0x6978cae0, + 0xb5781f86, 0x16ee37ca, 0x2925495f, 0x8ab36113, 0xe7328d38, + 0x44a4a574, 0x7b6fdbe1, 0xd8f9f3ad, 0x04f926cb, 0xa76f0e87, + 0x98a47012, 0x3b32585e, 0xfbd4dc9f, 0x5842f4d3, 0x67898a46, + 0xc41fa20a, 0x181f776c, 0xbb895f20, 0x844221b5, 0x27d409f9, + 0xdefe2e76, 0x7d68063a, 0x42a378af, 0xe13550e3, 0x3d358585, + 0x9ea3adc9, 0xa168d35c, 0x02fefb10, 0xc2187fd1, 0x618e579d, + 0x5e452908, 0xfdd30144, 0x21d3d422, 0x8245fc6e, 0xbd8e82fb, + 0x1e18aab7, 0x94abcba4, 0x373de3e8, 0x08f69d7d, 0xab60b531, + 0x77606057, 0xd4f6481b, 0xeb3d368e, 0x48ab1ec2, 0x884d9a03, + 0x2bdbb24f, 0x1410ccda, 0xb786e496, 0x6b8631f0, 0xc81019bc, + 0xf7db6729, 0x544d4f65, 0xad6768ea, 0x0ef140a6, 0x313a3e33, + 0x92ac167f, 0x4eacc319, 0xed3aeb55, 0xd2f195c0, 0x7167bd8c, + 0xb181394d, 0x12171101, 0x2ddc6f94, 0x8e4a47d8, 0x524a92be, + 0xf1dcbaf2, 0xce17c467, 0x6d81ec2b, 0x15141c31, 0xb682347d, + 0x89494ae8, 0x2adf62a4, 0xf6dfb7c2, 0x55499f8e, 0x6a82e11b, + 0xc914c957, 0x09f24d96, 0xaa6465da, 0x95af1b4f, 0x36393303, + 0xea39e665, 0x49afce29, 0x7664b0bc, 0xd5f298f0, 0x2cd8bf7f, + 0x8f4e9733, 0xb085e9a6, 0x1313c1ea, 0xcf13148c, 0x6c853cc0, + 0x534e4255, 0xf0d86a19, 0x303eeed8, 0x93a8c694, 0xac63b801, + 0x0ff5904d, 0xd3f5452b, 0x70636d67, 0x4fa813f2, 0xec3e3bbe, + 0x668d5aad, 0xc51b72e1, 0xfad00c74, 0x59462438, 0x8546f15e, + 0x26d0d912, 0x191ba787, 0xba8d8fcb, 0x7a6b0b0a, 0xd9fd2346, + 0xe6365dd3, 0x45a0759f, 0x99a0a0f9, 0x3a3688b5, 0x05fdf620, + 0xa66bde6c, 0x5f41f9e3, 0xfcd7d1af, 0xc31caf3a, 0x608a8776, + 0xbc8a5210, 0x1f1c7a5c, 0x20d704c9, 0x83412c85, 0x43a7a844, + 0xe0318008, 0xdffafe9d, 0x7c6cd6d1, 0xa06c03b7, 0x03fa2bfb, + 0x3c31556e, 0x9fa77d22, 0xf2269109, 0x51b0b945, 0x6e7bc7d0, + 0xcdedef9c, 0x11ed3afa, 0xb27b12b6, 0x8db06c23, 0x2e26446f, + 0xeec0c0ae, 0x4d56e8e2, 0x729d9677, 0xd10bbe3b, 0x0d0b6b5d, + 0xae9d4311, 0x91563d84, 0x32c015c8, 0xcbea3247, 0x687c1a0b, + 0x57b7649e, 0xf4214cd2, 0x282199b4, 0x8bb7b1f8, 0xb47ccf6d, + 0x17eae721, 0xd70c63e0, 0x749a4bac, 0x4b513539, 0xe8c71d75, + 0x34c7c813, 0x9751e05f, 0xa89a9eca, 0x0b0cb686, 0x81bfd795, + 0x2229ffd9, 0x1de2814c, 0xbe74a900, 0x62747c66, 0xc1e2542a, + 0xfe292abf, 0x5dbf02f3, 0x9d598632, 0x3ecfae7e, 0x0104d0eb, + 0xa292f8a7, 0x7e922dc1, 0xdd04058d, 0xe2cf7b18, 0x41595354, + 0xb87374db, 0x1be55c97, 0x242e2202, 0x87b80a4e, 0x5bb8df28, + 0xf82ef764, 0xc7e589f1, 0x6473a1bd, 0xa495257c, 0x07030d30, + 0x38c873a5, 0x9b5e5be9, 0x475e8e8f, 0xe4c8a6c3, 0xdb03d856, + 0x7895f01a}, + {0x00000000, 0x2a283862, 0x545070c4, 0x7e7848a6, 0xa8a0e188, + 0x8288d9ea, 0xfcf0914c, 0xd6d8a92e, 0x8a30c551, 0xa018fd33, + 0xde60b595, 0xf4488df7, 0x229024d9, 0x08b81cbb, 0x76c0541d, + 0x5ce86c7f, 0xcf108ce3, 0xe538b481, 0x9b40fc27, 0xb168c445, + 0x67b06d6b, 0x4d985509, 0x33e01daf, 0x19c825cd, 0x452049b2, + 0x6f0871d0, 0x11703976, 0x3b580114, 0xed80a83a, 0xc7a89058, + 0xb9d0d8fe, 0x93f8e09c, 0x45501f87, 0x6f7827e5, 0x11006f43, + 0x3b285721, 0xedf0fe0f, 0xc7d8c66d, 0xb9a08ecb, 0x9388b6a9, + 0xcf60dad6, 0xe548e2b4, 0x9b30aa12, 0xb1189270, 0x67c03b5e, + 0x4de8033c, 0x33904b9a, 0x19b873f8, 0x8a409364, 0xa068ab06, + 0xde10e3a0, 0xf438dbc2, 0x22e072ec, 0x08c84a8e, 0x76b00228, + 0x5c983a4a, 0x00705635, 0x2a586e57, 0x542026f1, 0x7e081e93, + 0xa8d0b7bd, 0x82f88fdf, 0xfc80c779, 0xd6a8ff1b, 0x8aa03f0e, + 0xa088076c, 0xdef04fca, 0xf4d877a8, 0x2200de86, 0x0828e6e4, + 0x7650ae42, 0x5c789620, 0x0090fa5f, 0x2ab8c23d, 0x54c08a9b, + 0x7ee8b2f9, 0xa8301bd7, 0x821823b5, 0xfc606b13, 0xd6485371, + 0x45b0b3ed, 0x6f988b8f, 0x11e0c329, 0x3bc8fb4b, 0xed105265, + 0xc7386a07, 0xb94022a1, 0x93681ac3, 0xcf8076bc, 0xe5a84ede, + 0x9bd00678, 0xb1f83e1a, 0x67209734, 0x4d08af56, 0x3370e7f0, + 0x1958df92, 0xcff02089, 0xe5d818eb, 0x9ba0504d, 0xb188682f, + 0x6750c101, 0x4d78f963, 0x3300b1c5, 0x192889a7, 0x45c0e5d8, + 0x6fe8ddba, 0x1190951c, 0x3bb8ad7e, 0xed600450, 0xc7483c32, + 0xb9307494, 0x93184cf6, 0x00e0ac6a, 0x2ac89408, 0x54b0dcae, + 0x7e98e4cc, 0xa8404de2, 0x82687580, 0xfc103d26, 0xd6380544, + 0x8ad0693b, 0xa0f85159, 0xde8019ff, 0xf4a8219d, 0x227088b3, + 0x0858b0d1, 0x7620f877, 0x5c08c015, 0xce31785d, 0xe419403f, + 0x9a610899, 0xb04930fb, 0x669199d5, 0x4cb9a1b7, 0x32c1e911, + 0x18e9d173, 0x4401bd0c, 0x6e29856e, 0x1051cdc8, 0x3a79f5aa, + 0xeca15c84, 0xc68964e6, 0xb8f12c40, 0x92d91422, 0x0121f4be, + 0x2b09ccdc, 0x5571847a, 0x7f59bc18, 0xa9811536, 0x83a92d54, + 0xfdd165f2, 0xd7f95d90, 0x8b1131ef, 0xa139098d, 0xdf41412b, + 0xf5697949, 0x23b1d067, 0x0999e805, 0x77e1a0a3, 0x5dc998c1, + 0x8b6167da, 0xa1495fb8, 0xdf31171e, 0xf5192f7c, 0x23c18652, + 0x09e9be30, 0x7791f696, 0x5db9cef4, 0x0151a28b, 0x2b799ae9, + 0x5501d24f, 0x7f29ea2d, 0xa9f14303, 0x83d97b61, 0xfda133c7, + 0xd7890ba5, 0x4471eb39, 0x6e59d35b, 0x10219bfd, 0x3a09a39f, + 0xecd10ab1, 0xc6f932d3, 0xb8817a75, 0x92a94217, 0xce412e68, + 0xe469160a, 0x9a115eac, 0xb03966ce, 0x66e1cfe0, 0x4cc9f782, + 0x32b1bf24, 0x18998746, 0x44914753, 0x6eb97f31, 0x10c13797, + 0x3ae90ff5, 0xec31a6db, 0xc6199eb9, 0xb861d61f, 0x9249ee7d, + 0xcea18202, 0xe489ba60, 0x9af1f2c6, 0xb0d9caa4, 0x6601638a, + 0x4c295be8, 0x3251134e, 0x18792b2c, 0x8b81cbb0, 0xa1a9f3d2, + 0xdfd1bb74, 0xf5f98316, 0x23212a38, 0x0909125a, 0x77715afc, + 0x5d59629e, 0x01b10ee1, 0x2b993683, 0x55e17e25, 0x7fc94647, + 0xa911ef69, 0x8339d70b, 0xfd419fad, 0xd769a7cf, 0x01c158d4, + 0x2be960b6, 0x55912810, 0x7fb91072, 0xa961b95c, 0x8349813e, + 0xfd31c998, 0xd719f1fa, 0x8bf19d85, 0xa1d9a5e7, 0xdfa1ed41, + 0xf589d523, 0x23517c0d, 0x0979446f, 0x77010cc9, 0x5d2934ab, + 0xced1d437, 0xe4f9ec55, 0x9a81a4f3, 0xb0a99c91, 0x667135bf, + 0x4c590ddd, 0x3221457b, 0x18097d19, 0x44e11166, 0x6ec92904, + 0x10b161a2, 0x3a9959c0, 0xec41f0ee, 0xc669c88c, 0xb811802a, + 0x9239b848}, + {0x00000000, 0x4713f6fb, 0x8e27edf6, 0xc9341b0d, 0xc73eddad, + 0x802d2b56, 0x4919305b, 0x0e0ac6a0, 0x550cbd1b, 0x121f4be0, + 0xdb2b50ed, 0x9c38a616, 0x923260b6, 0xd521964d, 0x1c158d40, + 0x5b067bbb, 0xaa197a36, 0xed0a8ccd, 0x243e97c0, 0x632d613b, + 0x6d27a79b, 0x2a345160, 0xe3004a6d, 0xa413bc96, 0xff15c72d, + 0xb80631d6, 0x71322adb, 0x3621dc20, 0x382b1a80, 0x7f38ec7b, + 0xb60cf776, 0xf11f018d, 0x8f43f22d, 0xc85004d6, 0x01641fdb, + 0x4677e920, 0x487d2f80, 0x0f6ed97b, 0xc65ac276, 0x8149348d, + 0xda4f4f36, 0x9d5cb9cd, 0x5468a2c0, 0x137b543b, 0x1d71929b, + 0x5a626460, 0x93567f6d, 0xd4458996, 0x255a881b, 0x62497ee0, + 0xab7d65ed, 0xec6e9316, 0xe26455b6, 0xa577a34d, 0x6c43b840, + 0x2b504ebb, 0x70563500, 0x3745c3fb, 0xfe71d8f6, 0xb9622e0d, + 0xb768e8ad, 0xf07b1e56, 0x394f055b, 0x7e5cf3a0, 0xc5f6e21b, + 0x82e514e0, 0x4bd10fed, 0x0cc2f916, 0x02c83fb6, 0x45dbc94d, + 0x8cefd240, 0xcbfc24bb, 0x90fa5f00, 0xd7e9a9fb, 0x1eddb2f6, + 0x59ce440d, 0x57c482ad, 0x10d77456, 0xd9e36f5b, 0x9ef099a0, + 0x6fef982d, 0x28fc6ed6, 0xe1c875db, 0xa6db8320, 0xa8d14580, + 0xefc2b37b, 0x26f6a876, 0x61e55e8d, 0x3ae32536, 0x7df0d3cd, + 0xb4c4c8c0, 0xf3d73e3b, 0xfdddf89b, 0xbace0e60, 0x73fa156d, + 0x34e9e396, 0x4ab51036, 0x0da6e6cd, 0xc492fdc0, 0x83810b3b, + 0x8d8bcd9b, 0xca983b60, 0x03ac206d, 0x44bfd696, 0x1fb9ad2d, + 0x58aa5bd6, 0x919e40db, 0xd68db620, 0xd8877080, 0x9f94867b, + 0x56a09d76, 0x11b36b8d, 0xe0ac6a00, 0xa7bf9cfb, 0x6e8b87f6, + 0x2998710d, 0x2792b7ad, 0x60814156, 0xa9b55a5b, 0xeea6aca0, + 0xb5a0d71b, 0xf2b321e0, 0x3b873aed, 0x7c94cc16, 0x729e0ab6, + 0x358dfc4d, 0xfcb9e740, 0xbbaa11bb, 0x509cc277, 0x178f348c, + 0xdebb2f81, 0x99a8d97a, 0x97a21fda, 0xd0b1e921, 0x1985f22c, + 0x5e9604d7, 0x05907f6c, 0x42838997, 0x8bb7929a, 0xcca46461, + 0xc2aea2c1, 0x85bd543a, 0x4c894f37, 0x0b9ab9cc, 0xfa85b841, + 0xbd964eba, 0x74a255b7, 0x33b1a34c, 0x3dbb65ec, 0x7aa89317, + 0xb39c881a, 0xf48f7ee1, 0xaf89055a, 0xe89af3a1, 0x21aee8ac, + 0x66bd1e57, 0x68b7d8f7, 0x2fa42e0c, 0xe6903501, 0xa183c3fa, + 0xdfdf305a, 0x98ccc6a1, 0x51f8ddac, 0x16eb2b57, 0x18e1edf7, + 0x5ff21b0c, 0x96c60001, 0xd1d5f6fa, 0x8ad38d41, 0xcdc07bba, + 0x04f460b7, 0x43e7964c, 0x4ded50ec, 0x0afea617, 0xc3cabd1a, + 0x84d94be1, 0x75c64a6c, 0x32d5bc97, 0xfbe1a79a, 0xbcf25161, + 0xb2f897c1, 0xf5eb613a, 0x3cdf7a37, 0x7bcc8ccc, 0x20caf777, + 0x67d9018c, 0xaeed1a81, 0xe9feec7a, 0xe7f42ada, 0xa0e7dc21, + 0x69d3c72c, 0x2ec031d7, 0x956a206c, 0xd279d697, 0x1b4dcd9a, + 0x5c5e3b61, 0x5254fdc1, 0x15470b3a, 0xdc731037, 0x9b60e6cc, + 0xc0669d77, 0x87756b8c, 0x4e417081, 0x0952867a, 0x075840da, + 0x404bb621, 0x897fad2c, 0xce6c5bd7, 0x3f735a5a, 0x7860aca1, + 0xb154b7ac, 0xf6474157, 0xf84d87f7, 0xbf5e710c, 0x766a6a01, + 0x31799cfa, 0x6a7fe741, 0x2d6c11ba, 0xe4580ab7, 0xa34bfc4c, + 0xad413aec, 0xea52cc17, 0x2366d71a, 0x647521e1, 0x1a29d241, + 0x5d3a24ba, 0x940e3fb7, 0xd31dc94c, 0xdd170fec, 0x9a04f917, + 0x5330e21a, 0x142314e1, 0x4f256f5a, 0x083699a1, 0xc10282ac, + 0x86117457, 0x881bb2f7, 0xcf08440c, 0x063c5f01, 0x412fa9fa, + 0xb030a877, 0xf7235e8c, 0x3e174581, 0x7904b37a, 0x770e75da, + 0x301d8321, 0xf929982c, 0xbe3a6ed7, 0xe53c156c, 0xa22fe397, + 0x6b1bf89a, 0x2c080e61, 0x2202c8c1, 0x65113e3a, 0xac252537, + 0xeb36d3cc}, + {0x00000000, 0xa13984ee, 0x99020f9d, 0x383b8b73, 0xe975197b, + 0x484c9d95, 0x707716e6, 0xd14e9208, 0x099b34b7, 0xa8a2b059, + 0x90993b2a, 0x31a0bfc4, 0xe0ee2dcc, 0x41d7a922, 0x79ec2251, + 0xd8d5a6bf, 0x1336696e, 0xb20fed80, 0x8a3466f3, 0x2b0de21d, + 0xfa437015, 0x5b7af4fb, 0x63417f88, 0xc278fb66, 0x1aad5dd9, + 0xbb94d937, 0x83af5244, 0x2296d6aa, 0xf3d844a2, 0x52e1c04c, + 0x6ada4b3f, 0xcbe3cfd1, 0x266cd2dc, 0x87555632, 0xbf6edd41, + 0x1e5759af, 0xcf19cba7, 0x6e204f49, 0x561bc43a, 0xf72240d4, + 0x2ff7e66b, 0x8ece6285, 0xb6f5e9f6, 0x17cc6d18, 0xc682ff10, + 0x67bb7bfe, 0x5f80f08d, 0xfeb97463, 0x355abbb2, 0x94633f5c, + 0xac58b42f, 0x0d6130c1, 0xdc2fa2c9, 0x7d162627, 0x452dad54, + 0xe41429ba, 0x3cc18f05, 0x9df80beb, 0xa5c38098, 0x04fa0476, + 0xd5b4967e, 0x748d1290, 0x4cb699e3, 0xed8f1d0d, 0x4cd9a5b8, + 0xede02156, 0xd5dbaa25, 0x74e22ecb, 0xa5acbcc3, 0x0495382d, + 0x3caeb35e, 0x9d9737b0, 0x4542910f, 0xe47b15e1, 0xdc409e92, + 0x7d791a7c, 0xac378874, 0x0d0e0c9a, 0x353587e9, 0x940c0307, + 0x5fefccd6, 0xfed64838, 0xc6edc34b, 0x67d447a5, 0xb69ad5ad, + 0x17a35143, 0x2f98da30, 0x8ea15ede, 0x5674f861, 0xf74d7c8f, + 0xcf76f7fc, 0x6e4f7312, 0xbf01e11a, 0x1e3865f4, 0x2603ee87, + 0x873a6a69, 0x6ab57764, 0xcb8cf38a, 0xf3b778f9, 0x528efc17, + 0x83c06e1f, 0x22f9eaf1, 0x1ac26182, 0xbbfbe56c, 0x632e43d3, + 0xc217c73d, 0xfa2c4c4e, 0x5b15c8a0, 0x8a5b5aa8, 0x2b62de46, + 0x13595535, 0xb260d1db, 0x79831e0a, 0xd8ba9ae4, 0xe0811197, + 0x41b89579, 0x90f60771, 0x31cf839f, 0x09f408ec, 0xa8cd8c02, + 0x70182abd, 0xd121ae53, 0xe91a2520, 0x4823a1ce, 0x996d33c6, + 0x3854b728, 0x006f3c5b, 0xa156b8b5, 0x99b34b70, 0x388acf9e, + 0x00b144ed, 0xa188c003, 0x70c6520b, 0xd1ffd6e5, 0xe9c45d96, + 0x48fdd978, 0x90287fc7, 0x3111fb29, 0x092a705a, 0xa813f4b4, + 0x795d66bc, 0xd864e252, 0xe05f6921, 0x4166edcf, 0x8a85221e, + 0x2bbca6f0, 0x13872d83, 0xb2bea96d, 0x63f03b65, 0xc2c9bf8b, + 0xfaf234f8, 0x5bcbb016, 0x831e16a9, 0x22279247, 0x1a1c1934, + 0xbb259dda, 0x6a6b0fd2, 0xcb528b3c, 0xf369004f, 0x525084a1, + 0xbfdf99ac, 0x1ee61d42, 0x26dd9631, 0x87e412df, 0x56aa80d7, + 0xf7930439, 0xcfa88f4a, 0x6e910ba4, 0xb644ad1b, 0x177d29f5, + 0x2f46a286, 0x8e7f2668, 0x5f31b460, 0xfe08308e, 0xc633bbfd, + 0x670a3f13, 0xace9f0c2, 0x0dd0742c, 0x35ebff5f, 0x94d27bb1, + 0x459ce9b9, 0xe4a56d57, 0xdc9ee624, 0x7da762ca, 0xa572c475, + 0x044b409b, 0x3c70cbe8, 0x9d494f06, 0x4c07dd0e, 0xed3e59e0, + 0xd505d293, 0x743c567d, 0xd56aeec8, 0x74536a26, 0x4c68e155, + 0xed5165bb, 0x3c1ff7b3, 0x9d26735d, 0xa51df82e, 0x04247cc0, + 0xdcf1da7f, 0x7dc85e91, 0x45f3d5e2, 0xe4ca510c, 0x3584c304, + 0x94bd47ea, 0xac86cc99, 0x0dbf4877, 0xc65c87a6, 0x67650348, + 0x5f5e883b, 0xfe670cd5, 0x2f299edd, 0x8e101a33, 0xb62b9140, + 0x171215ae, 0xcfc7b311, 0x6efe37ff, 0x56c5bc8c, 0xf7fc3862, + 0x26b2aa6a, 0x878b2e84, 0xbfb0a5f7, 0x1e892119, 0xf3063c14, + 0x523fb8fa, 0x6a043389, 0xcb3db767, 0x1a73256f, 0xbb4aa181, + 0x83712af2, 0x2248ae1c, 0xfa9d08a3, 0x5ba48c4d, 0x639f073e, + 0xc2a683d0, 0x13e811d8, 0xb2d19536, 0x8aea1e45, 0x2bd39aab, + 0xe030557a, 0x4109d194, 0x79325ae7, 0xd80bde09, 0x09454c01, + 0xa87cc8ef, 0x9047439c, 0x317ec772, 0xe9ab61cd, 0x4892e523, + 0x70a96e50, 0xd190eabe, 0x00de78b6, 0xa1e7fc58, 0x99dc772b, + 0x38e5f3c5}, + {0x00000000, 0xe81790a1, 0x0b5e2703, 0xe349b7a2, 0x16bc4e06, + 0xfeabdea7, 0x1de26905, 0xf5f5f9a4, 0x2d789c0c, 0xc56f0cad, + 0x2626bb0f, 0xce312bae, 0x3bc4d20a, 0xd3d342ab, 0x309af509, + 0xd88d65a8, 0x5af13818, 0xb2e6a8b9, 0x51af1f1b, 0xb9b88fba, + 0x4c4d761e, 0xa45ae6bf, 0x4713511d, 0xaf04c1bc, 0x7789a414, + 0x9f9e34b5, 0x7cd78317, 0x94c013b6, 0x6135ea12, 0x89227ab3, + 0x6a6bcd11, 0x827c5db0, 0xb5e27030, 0x5df5e091, 0xbebc5733, + 0x56abc792, 0xa35e3e36, 0x4b49ae97, 0xa8001935, 0x40178994, + 0x989aec3c, 0x708d7c9d, 0x93c4cb3f, 0x7bd35b9e, 0x8e26a23a, + 0x6631329b, 0x85788539, 0x6d6f1598, 0xef134828, 0x0704d889, + 0xe44d6f2b, 0x0c5aff8a, 0xf9af062e, 0x11b8968f, 0xf2f1212d, + 0x1ae6b18c, 0xc26bd424, 0x2a7c4485, 0xc935f327, 0x21226386, + 0xd4d79a22, 0x3cc00a83, 0xdf89bd21, 0x379e2d80, 0xb0b5e621, + 0x58a27680, 0xbbebc122, 0x53fc5183, 0xa609a827, 0x4e1e3886, + 0xad578f24, 0x45401f85, 0x9dcd7a2d, 0x75daea8c, 0x96935d2e, + 0x7e84cd8f, 0x8b71342b, 0x6366a48a, 0x802f1328, 0x68388389, + 0xea44de39, 0x02534e98, 0xe11af93a, 0x090d699b, 0xfcf8903f, + 0x14ef009e, 0xf7a6b73c, 0x1fb1279d, 0xc73c4235, 0x2f2bd294, + 0xcc626536, 0x2475f597, 0xd1800c33, 0x39979c92, 0xdade2b30, + 0x32c9bb91, 0x05579611, 0xed4006b0, 0x0e09b112, 0xe61e21b3, + 0x13ebd817, 0xfbfc48b6, 0x18b5ff14, 0xf0a26fb5, 0x282f0a1d, + 0xc0389abc, 0x23712d1e, 0xcb66bdbf, 0x3e93441b, 0xd684d4ba, + 0x35cd6318, 0xdddaf3b9, 0x5fa6ae09, 0xb7b13ea8, 0x54f8890a, + 0xbcef19ab, 0x491ae00f, 0xa10d70ae, 0x4244c70c, 0xaa5357ad, + 0x72de3205, 0x9ac9a2a4, 0x79801506, 0x919785a7, 0x64627c03, + 0x8c75eca2, 0x6f3c5b00, 0x872bcba1, 0xba1aca03, 0x520d5aa2, + 0xb144ed00, 0x59537da1, 0xaca68405, 0x44b114a4, 0xa7f8a306, + 0x4fef33a7, 0x9762560f, 0x7f75c6ae, 0x9c3c710c, 0x742be1ad, + 0x81de1809, 0x69c988a8, 0x8a803f0a, 0x6297afab, 0xe0ebf21b, + 0x08fc62ba, 0xebb5d518, 0x03a245b9, 0xf657bc1d, 0x1e402cbc, + 0xfd099b1e, 0x151e0bbf, 0xcd936e17, 0x2584feb6, 0xc6cd4914, + 0x2edad9b5, 0xdb2f2011, 0x3338b0b0, 0xd0710712, 0x386697b3, + 0x0ff8ba33, 0xe7ef2a92, 0x04a69d30, 0xecb10d91, 0x1944f435, + 0xf1536494, 0x121ad336, 0xfa0d4397, 0x2280263f, 0xca97b69e, + 0x29de013c, 0xc1c9919d, 0x343c6839, 0xdc2bf898, 0x3f624f3a, + 0xd775df9b, 0x5509822b, 0xbd1e128a, 0x5e57a528, 0xb6403589, + 0x43b5cc2d, 0xaba25c8c, 0x48ebeb2e, 0xa0fc7b8f, 0x78711e27, + 0x90668e86, 0x732f3924, 0x9b38a985, 0x6ecd5021, 0x86dac080, + 0x65937722, 0x8d84e783, 0x0aaf2c22, 0xe2b8bc83, 0x01f10b21, + 0xe9e69b80, 0x1c136224, 0xf404f285, 0x174d4527, 0xff5ad586, + 0x27d7b02e, 0xcfc0208f, 0x2c89972d, 0xc49e078c, 0x316bfe28, + 0xd97c6e89, 0x3a35d92b, 0xd222498a, 0x505e143a, 0xb849849b, + 0x5b003339, 0xb317a398, 0x46e25a3c, 0xaef5ca9d, 0x4dbc7d3f, + 0xa5abed9e, 0x7d268836, 0x95311897, 0x7678af35, 0x9e6f3f94, + 0x6b9ac630, 0x838d5691, 0x60c4e133, 0x88d37192, 0xbf4d5c12, + 0x575accb3, 0xb4137b11, 0x5c04ebb0, 0xa9f11214, 0x41e682b5, + 0xa2af3517, 0x4ab8a5b6, 0x9235c01e, 0x7a2250bf, 0x996be71d, + 0x717c77bc, 0x84898e18, 0x6c9e1eb9, 0x8fd7a91b, 0x67c039ba, + 0xe5bc640a, 0x0dabf4ab, 0xeee24309, 0x06f5d3a8, 0xf3002a0c, + 0x1b17baad, 0xf85e0d0f, 0x10499dae, 0xc8c4f806, 0x20d368a7, + 0xc39adf05, 0x2b8d4fa4, 0xde78b600, 0x366f26a1, 0xd5269103, + 0x3d3101a2}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0xa19017e800000000, 0x03275e0b00000000, + 0xa2b749e300000000, 0x064ebc1600000000, 0xa7deabfe00000000, + 0x0569e21d00000000, 0xa4f9f5f500000000, 0x0c9c782d00000000, + 0xad0c6fc500000000, 0x0fbb262600000000, 0xae2b31ce00000000, + 0x0ad2c43b00000000, 0xab42d3d300000000, 0x09f59a3000000000, + 0xa8658dd800000000, 0x1838f15a00000000, 0xb9a8e6b200000000, + 0x1b1faf5100000000, 0xba8fb8b900000000, 0x1e764d4c00000000, + 0xbfe65aa400000000, 0x1d51134700000000, 0xbcc104af00000000, + 0x14a4897700000000, 0xb5349e9f00000000, 0x1783d77c00000000, + 0xb613c09400000000, 0x12ea356100000000, 0xb37a228900000000, + 0x11cd6b6a00000000, 0xb05d7c8200000000, 0x3070e2b500000000, + 0x91e0f55d00000000, 0x3357bcbe00000000, 0x92c7ab5600000000, + 0x363e5ea300000000, 0x97ae494b00000000, 0x351900a800000000, + 0x9489174000000000, 0x3cec9a9800000000, 0x9d7c8d7000000000, + 0x3fcbc49300000000, 0x9e5bd37b00000000, 0x3aa2268e00000000, + 0x9b32316600000000, 0x3985788500000000, 0x98156f6d00000000, + 0x284813ef00000000, 0x89d8040700000000, 0x2b6f4de400000000, + 0x8aff5a0c00000000, 0x2e06aff900000000, 0x8f96b81100000000, + 0x2d21f1f200000000, 0x8cb1e61a00000000, 0x24d46bc200000000, + 0x85447c2a00000000, 0x27f335c900000000, 0x8663222100000000, + 0x229ad7d400000000, 0x830ac03c00000000, 0x21bd89df00000000, + 0x802d9e3700000000, 0x21e6b5b000000000, 0x8076a25800000000, + 0x22c1ebbb00000000, 0x8351fc5300000000, 0x27a809a600000000, + 0x86381e4e00000000, 0x248f57ad00000000, 0x851f404500000000, + 0x2d7acd9d00000000, 0x8ceada7500000000, 0x2e5d939600000000, + 0x8fcd847e00000000, 0x2b34718b00000000, 0x8aa4666300000000, + 0x28132f8000000000, 0x8983386800000000, 0x39de44ea00000000, + 0x984e530200000000, 0x3af91ae100000000, 0x9b690d0900000000, + 0x3f90f8fc00000000, 0x9e00ef1400000000, 0x3cb7a6f700000000, + 0x9d27b11f00000000, 0x35423cc700000000, 0x94d22b2f00000000, + 0x366562cc00000000, 0x97f5752400000000, 0x330c80d100000000, + 0x929c973900000000, 0x302bdeda00000000, 0x91bbc93200000000, + 0x1196570500000000, 0xb00640ed00000000, 0x12b1090e00000000, + 0xb3211ee600000000, 0x17d8eb1300000000, 0xb648fcfb00000000, + 0x14ffb51800000000, 0xb56fa2f000000000, 0x1d0a2f2800000000, + 0xbc9a38c000000000, 0x1e2d712300000000, 0xbfbd66cb00000000, + 0x1b44933e00000000, 0xbad484d600000000, 0x1863cd3500000000, + 0xb9f3dadd00000000, 0x09aea65f00000000, 0xa83eb1b700000000, + 0x0a89f85400000000, 0xab19efbc00000000, 0x0fe01a4900000000, + 0xae700da100000000, 0x0cc7444200000000, 0xad5753aa00000000, + 0x0532de7200000000, 0xa4a2c99a00000000, 0x0615807900000000, + 0xa785979100000000, 0x037c626400000000, 0xa2ec758c00000000, + 0x005b3c6f00000000, 0xa1cb2b8700000000, 0x03ca1aba00000000, + 0xa25a0d5200000000, 0x00ed44b100000000, 0xa17d535900000000, + 0x0584a6ac00000000, 0xa414b14400000000, 0x06a3f8a700000000, + 0xa733ef4f00000000, 0x0f56629700000000, 0xaec6757f00000000, + 0x0c713c9c00000000, 0xade12b7400000000, 0x0918de8100000000, + 0xa888c96900000000, 0x0a3f808a00000000, 0xabaf976200000000, + 0x1bf2ebe000000000, 0xba62fc0800000000, 0x18d5b5eb00000000, + 0xb945a20300000000, 0x1dbc57f600000000, 0xbc2c401e00000000, + 0x1e9b09fd00000000, 0xbf0b1e1500000000, 0x176e93cd00000000, + 0xb6fe842500000000, 0x1449cdc600000000, 0xb5d9da2e00000000, + 0x11202fdb00000000, 0xb0b0383300000000, 0x120771d000000000, + 0xb397663800000000, 0x33baf80f00000000, 0x922aefe700000000, + 0x309da60400000000, 0x910db1ec00000000, 0x35f4441900000000, + 0x946453f100000000, 0x36d31a1200000000, 0x97430dfa00000000, + 0x3f26802200000000, 0x9eb697ca00000000, 0x3c01de2900000000, + 0x9d91c9c100000000, 0x39683c3400000000, 0x98f82bdc00000000, + 0x3a4f623f00000000, 0x9bdf75d700000000, 0x2b82095500000000, + 0x8a121ebd00000000, 0x28a5575e00000000, 0x893540b600000000, + 0x2dccb54300000000, 0x8c5ca2ab00000000, 0x2eebeb4800000000, + 0x8f7bfca000000000, 0x271e717800000000, 0x868e669000000000, + 0x24392f7300000000, 0x85a9389b00000000, 0x2150cd6e00000000, + 0x80c0da8600000000, 0x2277936500000000, 0x83e7848d00000000, + 0x222caf0a00000000, 0x83bcb8e200000000, 0x210bf10100000000, + 0x809be6e900000000, 0x2462131c00000000, 0x85f204f400000000, + 0x27454d1700000000, 0x86d55aff00000000, 0x2eb0d72700000000, + 0x8f20c0cf00000000, 0x2d97892c00000000, 0x8c079ec400000000, + 0x28fe6b3100000000, 0x896e7cd900000000, 0x2bd9353a00000000, + 0x8a4922d200000000, 0x3a145e5000000000, 0x9b8449b800000000, + 0x3933005b00000000, 0x98a317b300000000, 0x3c5ae24600000000, + 0x9dcaf5ae00000000, 0x3f7dbc4d00000000, 0x9eedaba500000000, + 0x3688267d00000000, 0x9718319500000000, 0x35af787600000000, + 0x943f6f9e00000000, 0x30c69a6b00000000, 0x91568d8300000000, + 0x33e1c46000000000, 0x9271d38800000000, 0x125c4dbf00000000, + 0xb3cc5a5700000000, 0x117b13b400000000, 0xb0eb045c00000000, + 0x1412f1a900000000, 0xb582e64100000000, 0x1735afa200000000, + 0xb6a5b84a00000000, 0x1ec0359200000000, 0xbf50227a00000000, + 0x1de76b9900000000, 0xbc777c7100000000, 0x188e898400000000, + 0xb91e9e6c00000000, 0x1ba9d78f00000000, 0xba39c06700000000, + 0x0a64bce500000000, 0xabf4ab0d00000000, 0x0943e2ee00000000, + 0xa8d3f50600000000, 0x0c2a00f300000000, 0xadba171b00000000, + 0x0f0d5ef800000000, 0xae9d491000000000, 0x06f8c4c800000000, + 0xa768d32000000000, 0x05df9ac300000000, 0xa44f8d2b00000000, + 0x00b678de00000000, 0xa1266f3600000000, 0x039126d500000000, + 0xa201313d00000000}, + {0x0000000000000000, 0xee8439a100000000, 0x9d0f029900000000, + 0x738b3b3800000000, 0x7b1975e900000000, 0x959d4c4800000000, + 0xe616777000000000, 0x08924ed100000000, 0xb7349b0900000000, + 0x59b0a2a800000000, 0x2a3b999000000000, 0xc4bfa03100000000, + 0xcc2deee000000000, 0x22a9d74100000000, 0x5122ec7900000000, + 0xbfa6d5d800000000, 0x6e69361300000000, 0x80ed0fb200000000, + 0xf366348a00000000, 0x1de20d2b00000000, 0x157043fa00000000, + 0xfbf47a5b00000000, 0x887f416300000000, 0x66fb78c200000000, + 0xd95dad1a00000000, 0x37d994bb00000000, 0x4452af8300000000, + 0xaad6962200000000, 0xa244d8f300000000, 0x4cc0e15200000000, + 0x3f4bda6a00000000, 0xd1cfe3cb00000000, 0xdcd26c2600000000, + 0x3256558700000000, 0x41dd6ebf00000000, 0xaf59571e00000000, + 0xa7cb19cf00000000, 0x494f206e00000000, 0x3ac41b5600000000, + 0xd44022f700000000, 0x6be6f72f00000000, 0x8562ce8e00000000, + 0xf6e9f5b600000000, 0x186dcc1700000000, 0x10ff82c600000000, + 0xfe7bbb6700000000, 0x8df0805f00000000, 0x6374b9fe00000000, + 0xb2bb5a3500000000, 0x5c3f639400000000, 0x2fb458ac00000000, + 0xc130610d00000000, 0xc9a22fdc00000000, 0x2726167d00000000, + 0x54ad2d4500000000, 0xba2914e400000000, 0x058fc13c00000000, + 0xeb0bf89d00000000, 0x9880c3a500000000, 0x7604fa0400000000, + 0x7e96b4d500000000, 0x90128d7400000000, 0xe399b64c00000000, + 0x0d1d8fed00000000, 0xb8a5d94c00000000, 0x5621e0ed00000000, + 0x25aadbd500000000, 0xcb2ee27400000000, 0xc3bcaca500000000, + 0x2d38950400000000, 0x5eb3ae3c00000000, 0xb037979d00000000, + 0x0f91424500000000, 0xe1157be400000000, 0x929e40dc00000000, + 0x7c1a797d00000000, 0x748837ac00000000, 0x9a0c0e0d00000000, + 0xe987353500000000, 0x07030c9400000000, 0xd6ccef5f00000000, + 0x3848d6fe00000000, 0x4bc3edc600000000, 0xa547d46700000000, + 0xadd59ab600000000, 0x4351a31700000000, 0x30da982f00000000, + 0xde5ea18e00000000, 0x61f8745600000000, 0x8f7c4df700000000, + 0xfcf776cf00000000, 0x12734f6e00000000, 0x1ae101bf00000000, + 0xf465381e00000000, 0x87ee032600000000, 0x696a3a8700000000, + 0x6477b56a00000000, 0x8af38ccb00000000, 0xf978b7f300000000, + 0x17fc8e5200000000, 0x1f6ec08300000000, 0xf1eaf92200000000, + 0x8261c21a00000000, 0x6ce5fbbb00000000, 0xd3432e6300000000, + 0x3dc717c200000000, 0x4e4c2cfa00000000, 0xa0c8155b00000000, + 0xa85a5b8a00000000, 0x46de622b00000000, 0x3555591300000000, + 0xdbd160b200000000, 0x0a1e837900000000, 0xe49abad800000000, + 0x971181e000000000, 0x7995b84100000000, 0x7107f69000000000, + 0x9f83cf3100000000, 0xec08f40900000000, 0x028ccda800000000, + 0xbd2a187000000000, 0x53ae21d100000000, 0x20251ae900000000, + 0xcea1234800000000, 0xc6336d9900000000, 0x28b7543800000000, + 0x5b3c6f0000000000, 0xb5b856a100000000, 0x704bb39900000000, + 0x9ecf8a3800000000, 0xed44b10000000000, 0x03c088a100000000, + 0x0b52c67000000000, 0xe5d6ffd100000000, 0x965dc4e900000000, + 0x78d9fd4800000000, 0xc77f289000000000, 0x29fb113100000000, + 0x5a702a0900000000, 0xb4f413a800000000, 0xbc665d7900000000, + 0x52e264d800000000, 0x21695fe000000000, 0xcfed664100000000, + 0x1e22858a00000000, 0xf0a6bc2b00000000, 0x832d871300000000, + 0x6da9beb200000000, 0x653bf06300000000, 0x8bbfc9c200000000, + 0xf834f2fa00000000, 0x16b0cb5b00000000, 0xa9161e8300000000, + 0x4792272200000000, 0x34191c1a00000000, 0xda9d25bb00000000, + 0xd20f6b6a00000000, 0x3c8b52cb00000000, 0x4f0069f300000000, + 0xa184505200000000, 0xac99dfbf00000000, 0x421de61e00000000, + 0x3196dd2600000000, 0xdf12e48700000000, 0xd780aa5600000000, + 0x390493f700000000, 0x4a8fa8cf00000000, 0xa40b916e00000000, + 0x1bad44b600000000, 0xf5297d1700000000, 0x86a2462f00000000, + 0x68267f8e00000000, 0x60b4315f00000000, 0x8e3008fe00000000, + 0xfdbb33c600000000, 0x133f0a6700000000, 0xc2f0e9ac00000000, + 0x2c74d00d00000000, 0x5fffeb3500000000, 0xb17bd29400000000, + 0xb9e99c4500000000, 0x576da5e400000000, 0x24e69edc00000000, + 0xca62a77d00000000, 0x75c472a500000000, 0x9b404b0400000000, + 0xe8cb703c00000000, 0x064f499d00000000, 0x0edd074c00000000, + 0xe0593eed00000000, 0x93d205d500000000, 0x7d563c7400000000, + 0xc8ee6ad500000000, 0x266a537400000000, 0x55e1684c00000000, + 0xbb6551ed00000000, 0xb3f71f3c00000000, 0x5d73269d00000000, + 0x2ef81da500000000, 0xc07c240400000000, 0x7fdaf1dc00000000, + 0x915ec87d00000000, 0xe2d5f34500000000, 0x0c51cae400000000, + 0x04c3843500000000, 0xea47bd9400000000, 0x99cc86ac00000000, + 0x7748bf0d00000000, 0xa6875cc600000000, 0x4803656700000000, + 0x3b885e5f00000000, 0xd50c67fe00000000, 0xdd9e292f00000000, + 0x331a108e00000000, 0x40912bb600000000, 0xae15121700000000, + 0x11b3c7cf00000000, 0xff37fe6e00000000, 0x8cbcc55600000000, + 0x6238fcf700000000, 0x6aaab22600000000, 0x842e8b8700000000, + 0xf7a5b0bf00000000, 0x1921891e00000000, 0x143c06f300000000, + 0xfab83f5200000000, 0x8933046a00000000, 0x67b73dcb00000000, + 0x6f25731a00000000, 0x81a14abb00000000, 0xf22a718300000000, + 0x1cae482200000000, 0xa3089dfa00000000, 0x4d8ca45b00000000, + 0x3e079f6300000000, 0xd083a6c200000000, 0xd811e81300000000, + 0x3695d1b200000000, 0x451eea8a00000000, 0xab9ad32b00000000, + 0x7a5530e000000000, 0x94d1094100000000, 0xe75a327900000000, + 0x09de0bd800000000, 0x014c450900000000, 0xefc87ca800000000, + 0x9c43479000000000, 0x72c77e3100000000, 0xcd61abe900000000, + 0x23e5924800000000, 0x506ea97000000000, 0xbeea90d100000000, + 0xb678de0000000000, 0x58fce7a100000000, 0x2b77dc9900000000, + 0xc5f3e53800000000}, + {0x0000000000000000, 0xfbf6134700000000, 0xf6ed278e00000000, + 0x0d1b34c900000000, 0xaddd3ec700000000, 0x562b2d8000000000, + 0x5b30194900000000, 0xa0c60a0e00000000, 0x1bbd0c5500000000, + 0xe04b1f1200000000, 0xed502bdb00000000, 0x16a6389c00000000, + 0xb660329200000000, 0x4d9621d500000000, 0x408d151c00000000, + 0xbb7b065b00000000, 0x367a19aa00000000, 0xcd8c0aed00000000, + 0xc0973e2400000000, 0x3b612d6300000000, 0x9ba7276d00000000, + 0x6051342a00000000, 0x6d4a00e300000000, 0x96bc13a400000000, + 0x2dc715ff00000000, 0xd63106b800000000, 0xdb2a327100000000, + 0x20dc213600000000, 0x801a2b3800000000, 0x7bec387f00000000, + 0x76f70cb600000000, 0x8d011ff100000000, 0x2df2438f00000000, + 0xd60450c800000000, 0xdb1f640100000000, 0x20e9774600000000, + 0x802f7d4800000000, 0x7bd96e0f00000000, 0x76c25ac600000000, + 0x8d34498100000000, 0x364f4fda00000000, 0xcdb95c9d00000000, + 0xc0a2685400000000, 0x3b547b1300000000, 0x9b92711d00000000, + 0x6064625a00000000, 0x6d7f569300000000, 0x968945d400000000, + 0x1b885a2500000000, 0xe07e496200000000, 0xed657dab00000000, + 0x16936eec00000000, 0xb65564e200000000, 0x4da377a500000000, + 0x40b8436c00000000, 0xbb4e502b00000000, 0x0035567000000000, + 0xfbc3453700000000, 0xf6d871fe00000000, 0x0d2e62b900000000, + 0xade868b700000000, 0x561e7bf000000000, 0x5b054f3900000000, + 0xa0f35c7e00000000, 0x1be2f6c500000000, 0xe014e58200000000, + 0xed0fd14b00000000, 0x16f9c20c00000000, 0xb63fc80200000000, + 0x4dc9db4500000000, 0x40d2ef8c00000000, 0xbb24fccb00000000, + 0x005ffa9000000000, 0xfba9e9d700000000, 0xf6b2dd1e00000000, + 0x0d44ce5900000000, 0xad82c45700000000, 0x5674d71000000000, + 0x5b6fe3d900000000, 0xa099f09e00000000, 0x2d98ef6f00000000, + 0xd66efc2800000000, 0xdb75c8e100000000, 0x2083dba600000000, + 0x8045d1a800000000, 0x7bb3c2ef00000000, 0x76a8f62600000000, + 0x8d5ee56100000000, 0x3625e33a00000000, 0xcdd3f07d00000000, + 0xc0c8c4b400000000, 0x3b3ed7f300000000, 0x9bf8ddfd00000000, + 0x600eceba00000000, 0x6d15fa7300000000, 0x96e3e93400000000, + 0x3610b54a00000000, 0xcde6a60d00000000, 0xc0fd92c400000000, + 0x3b0b818300000000, 0x9bcd8b8d00000000, 0x603b98ca00000000, + 0x6d20ac0300000000, 0x96d6bf4400000000, 0x2dadb91f00000000, + 0xd65baa5800000000, 0xdb409e9100000000, 0x20b68dd600000000, + 0x807087d800000000, 0x7b86949f00000000, 0x769da05600000000, + 0x8d6bb31100000000, 0x006aace000000000, 0xfb9cbfa700000000, + 0xf6878b6e00000000, 0x0d71982900000000, 0xadb7922700000000, + 0x5641816000000000, 0x5b5ab5a900000000, 0xa0aca6ee00000000, + 0x1bd7a0b500000000, 0xe021b3f200000000, 0xed3a873b00000000, + 0x16cc947c00000000, 0xb60a9e7200000000, 0x4dfc8d3500000000, + 0x40e7b9fc00000000, 0xbb11aabb00000000, 0x77c29c5000000000, + 0x8c348f1700000000, 0x812fbbde00000000, 0x7ad9a89900000000, + 0xda1fa29700000000, 0x21e9b1d000000000, 0x2cf2851900000000, + 0xd704965e00000000, 0x6c7f900500000000, 0x9789834200000000, + 0x9a92b78b00000000, 0x6164a4cc00000000, 0xc1a2aec200000000, + 0x3a54bd8500000000, 0x374f894c00000000, 0xccb99a0b00000000, + 0x41b885fa00000000, 0xba4e96bd00000000, 0xb755a27400000000, + 0x4ca3b13300000000, 0xec65bb3d00000000, 0x1793a87a00000000, + 0x1a889cb300000000, 0xe17e8ff400000000, 0x5a0589af00000000, + 0xa1f39ae800000000, 0xace8ae2100000000, 0x571ebd6600000000, + 0xf7d8b76800000000, 0x0c2ea42f00000000, 0x013590e600000000, + 0xfac383a100000000, 0x5a30dfdf00000000, 0xa1c6cc9800000000, + 0xacddf85100000000, 0x572beb1600000000, 0xf7ede11800000000, + 0x0c1bf25f00000000, 0x0100c69600000000, 0xfaf6d5d100000000, + 0x418dd38a00000000, 0xba7bc0cd00000000, 0xb760f40400000000, + 0x4c96e74300000000, 0xec50ed4d00000000, 0x17a6fe0a00000000, + 0x1abdcac300000000, 0xe14bd98400000000, 0x6c4ac67500000000, + 0x97bcd53200000000, 0x9aa7e1fb00000000, 0x6151f2bc00000000, + 0xc197f8b200000000, 0x3a61ebf500000000, 0x377adf3c00000000, + 0xcc8ccc7b00000000, 0x77f7ca2000000000, 0x8c01d96700000000, + 0x811aedae00000000, 0x7aecfee900000000, 0xda2af4e700000000, + 0x21dce7a000000000, 0x2cc7d36900000000, 0xd731c02e00000000, + 0x6c206a9500000000, 0x97d679d200000000, 0x9acd4d1b00000000, + 0x613b5e5c00000000, 0xc1fd545200000000, 0x3a0b471500000000, + 0x371073dc00000000, 0xcce6609b00000000, 0x779d66c000000000, + 0x8c6b758700000000, 0x8170414e00000000, 0x7a86520900000000, + 0xda40580700000000, 0x21b64b4000000000, 0x2cad7f8900000000, + 0xd75b6cce00000000, 0x5a5a733f00000000, 0xa1ac607800000000, + 0xacb754b100000000, 0x574147f600000000, 0xf7874df800000000, + 0x0c715ebf00000000, 0x016a6a7600000000, 0xfa9c793100000000, + 0x41e77f6a00000000, 0xba116c2d00000000, 0xb70a58e400000000, + 0x4cfc4ba300000000, 0xec3a41ad00000000, 0x17cc52ea00000000, + 0x1ad7662300000000, 0xe121756400000000, 0x41d2291a00000000, + 0xba243a5d00000000, 0xb73f0e9400000000, 0x4cc91dd300000000, + 0xec0f17dd00000000, 0x17f9049a00000000, 0x1ae2305300000000, + 0xe114231400000000, 0x5a6f254f00000000, 0xa199360800000000, + 0xac8202c100000000, 0x5774118600000000, 0xf7b21b8800000000, + 0x0c4408cf00000000, 0x015f3c0600000000, 0xfaa92f4100000000, + 0x77a830b000000000, 0x8c5e23f700000000, 0x8145173e00000000, + 0x7ab3047900000000, 0xda750e7700000000, 0x21831d3000000000, + 0x2c9829f900000000, 0xd76e3abe00000000, 0x6c153ce500000000, + 0x97e32fa200000000, 0x9af81b6b00000000, 0x610e082c00000000, + 0xc1c8022200000000, 0x3a3e116500000000, 0x372525ac00000000, + 0xccd336eb00000000}, + {0x0000000000000000, 0x6238282a00000000, 0xc470505400000000, + 0xa648787e00000000, 0x88e1a0a800000000, 0xead9888200000000, + 0x4c91f0fc00000000, 0x2ea9d8d600000000, 0x51c5308a00000000, + 0x33fd18a000000000, 0x95b560de00000000, 0xf78d48f400000000, + 0xd924902200000000, 0xbb1cb80800000000, 0x1d54c07600000000, + 0x7f6ce85c00000000, 0xe38c10cf00000000, 0x81b438e500000000, + 0x27fc409b00000000, 0x45c468b100000000, 0x6b6db06700000000, + 0x0955984d00000000, 0xaf1de03300000000, 0xcd25c81900000000, + 0xb249204500000000, 0xd071086f00000000, 0x7639701100000000, + 0x1401583b00000000, 0x3aa880ed00000000, 0x5890a8c700000000, + 0xfed8d0b900000000, 0x9ce0f89300000000, 0x871f504500000000, + 0xe527786f00000000, 0x436f001100000000, 0x2157283b00000000, + 0x0ffef0ed00000000, 0x6dc6d8c700000000, 0xcb8ea0b900000000, + 0xa9b6889300000000, 0xd6da60cf00000000, 0xb4e248e500000000, + 0x12aa309b00000000, 0x709218b100000000, 0x5e3bc06700000000, + 0x3c03e84d00000000, 0x9a4b903300000000, 0xf873b81900000000, + 0x6493408a00000000, 0x06ab68a000000000, 0xa0e310de00000000, + 0xc2db38f400000000, 0xec72e02200000000, 0x8e4ac80800000000, + 0x2802b07600000000, 0x4a3a985c00000000, 0x3556700000000000, + 0x576e582a00000000, 0xf126205400000000, 0x931e087e00000000, + 0xbdb7d0a800000000, 0xdf8ff88200000000, 0x79c780fc00000000, + 0x1bffa8d600000000, 0x0e3fa08a00000000, 0x6c0788a000000000, + 0xca4ff0de00000000, 0xa877d8f400000000, 0x86de002200000000, + 0xe4e6280800000000, 0x42ae507600000000, 0x2096785c00000000, + 0x5ffa900000000000, 0x3dc2b82a00000000, 0x9b8ac05400000000, + 0xf9b2e87e00000000, 0xd71b30a800000000, 0xb523188200000000, + 0x136b60fc00000000, 0x715348d600000000, 0xedb3b04500000000, + 0x8f8b986f00000000, 0x29c3e01100000000, 0x4bfbc83b00000000, + 0x655210ed00000000, 0x076a38c700000000, 0xa12240b900000000, + 0xc31a689300000000, 0xbc7680cf00000000, 0xde4ea8e500000000, + 0x7806d09b00000000, 0x1a3ef8b100000000, 0x3497206700000000, + 0x56af084d00000000, 0xf0e7703300000000, 0x92df581900000000, + 0x8920f0cf00000000, 0xeb18d8e500000000, 0x4d50a09b00000000, + 0x2f6888b100000000, 0x01c1506700000000, 0x63f9784d00000000, + 0xc5b1003300000000, 0xa789281900000000, 0xd8e5c04500000000, + 0xbadde86f00000000, 0x1c95901100000000, 0x7eadb83b00000000, + 0x500460ed00000000, 0x323c48c700000000, 0x947430b900000000, + 0xf64c189300000000, 0x6aace00000000000, 0x0894c82a00000000, + 0xaedcb05400000000, 0xcce4987e00000000, 0xe24d40a800000000, + 0x8075688200000000, 0x263d10fc00000000, 0x440538d600000000, + 0x3b69d08a00000000, 0x5951f8a000000000, 0xff1980de00000000, + 0x9d21a8f400000000, 0xb388702200000000, 0xd1b0580800000000, + 0x77f8207600000000, 0x15c0085c00000000, 0x5d7831ce00000000, + 0x3f4019e400000000, 0x9908619a00000000, 0xfb3049b000000000, + 0xd599916600000000, 0xb7a1b94c00000000, 0x11e9c13200000000, + 0x73d1e91800000000, 0x0cbd014400000000, 0x6e85296e00000000, + 0xc8cd511000000000, 0xaaf5793a00000000, 0x845ca1ec00000000, + 0xe66489c600000000, 0x402cf1b800000000, 0x2214d99200000000, + 0xbef4210100000000, 0xdccc092b00000000, 0x7a84715500000000, + 0x18bc597f00000000, 0x361581a900000000, 0x542da98300000000, + 0xf265d1fd00000000, 0x905df9d700000000, 0xef31118b00000000, + 0x8d0939a100000000, 0x2b4141df00000000, 0x497969f500000000, + 0x67d0b12300000000, 0x05e8990900000000, 0xa3a0e17700000000, + 0xc198c95d00000000, 0xda67618b00000000, 0xb85f49a100000000, + 0x1e1731df00000000, 0x7c2f19f500000000, 0x5286c12300000000, + 0x30bee90900000000, 0x96f6917700000000, 0xf4ceb95d00000000, + 0x8ba2510100000000, 0xe99a792b00000000, 0x4fd2015500000000, + 0x2dea297f00000000, 0x0343f1a900000000, 0x617bd98300000000, + 0xc733a1fd00000000, 0xa50b89d700000000, 0x39eb714400000000, + 0x5bd3596e00000000, 0xfd9b211000000000, 0x9fa3093a00000000, + 0xb10ad1ec00000000, 0xd332f9c600000000, 0x757a81b800000000, + 0x1742a99200000000, 0x682e41ce00000000, 0x0a1669e400000000, + 0xac5e119a00000000, 0xce6639b000000000, 0xe0cfe16600000000, + 0x82f7c94c00000000, 0x24bfb13200000000, 0x4687991800000000, + 0x5347914400000000, 0x317fb96e00000000, 0x9737c11000000000, + 0xf50fe93a00000000, 0xdba631ec00000000, 0xb99e19c600000000, + 0x1fd661b800000000, 0x7dee499200000000, 0x0282a1ce00000000, + 0x60ba89e400000000, 0xc6f2f19a00000000, 0xa4cad9b000000000, + 0x8a63016600000000, 0xe85b294c00000000, 0x4e13513200000000, + 0x2c2b791800000000, 0xb0cb818b00000000, 0xd2f3a9a100000000, + 0x74bbd1df00000000, 0x1683f9f500000000, 0x382a212300000000, + 0x5a12090900000000, 0xfc5a717700000000, 0x9e62595d00000000, + 0xe10eb10100000000, 0x8336992b00000000, 0x257ee15500000000, + 0x4746c97f00000000, 0x69ef11a900000000, 0x0bd7398300000000, + 0xad9f41fd00000000, 0xcfa769d700000000, 0xd458c10100000000, + 0xb660e92b00000000, 0x1028915500000000, 0x7210b97f00000000, + 0x5cb961a900000000, 0x3e81498300000000, 0x98c931fd00000000, + 0xfaf119d700000000, 0x859df18b00000000, 0xe7a5d9a100000000, + 0x41eda1df00000000, 0x23d589f500000000, 0x0d7c512300000000, + 0x6f44790900000000, 0xc90c017700000000, 0xab34295d00000000, + 0x37d4d1ce00000000, 0x55ecf9e400000000, 0xf3a4819a00000000, + 0x919ca9b000000000, 0xbf35716600000000, 0xdd0d594c00000000, + 0x7b45213200000000, 0x197d091800000000, 0x6611e14400000000, + 0x0429c96e00000000, 0xa261b11000000000, 0xc059993a00000000, + 0xeef041ec00000000, 0x8cc869c600000000, 0x2a8011b800000000, + 0x48b8399200000000}, + {0x0000000000000000, 0x4c2896a300000000, 0xd9565d9c00000000, + 0x957ecb3f00000000, 0xf3abcbe300000000, 0xbf835d4000000000, + 0x2afd967f00000000, 0x66d500dc00000000, 0xa751e61c00000000, + 0xeb7970bf00000000, 0x7e07bb8000000000, 0x322f2d2300000000, + 0x54fa2dff00000000, 0x18d2bb5c00000000, 0x8dac706300000000, + 0xc184e6c000000000, 0x4ea3cc3900000000, 0x028b5a9a00000000, + 0x97f591a500000000, 0xdbdd070600000000, 0xbd0807da00000000, + 0xf120917900000000, 0x645e5a4600000000, 0x2876cce500000000, + 0xe9f22a2500000000, 0xa5dabc8600000000, 0x30a477b900000000, + 0x7c8ce11a00000000, 0x1a59e1c600000000, 0x5671776500000000, + 0xc30fbc5a00000000, 0x8f272af900000000, 0x9c46997300000000, + 0xd06e0fd000000000, 0x4510c4ef00000000, 0x0938524c00000000, + 0x6fed529000000000, 0x23c5c43300000000, 0xb6bb0f0c00000000, + 0xfa9399af00000000, 0x3b177f6f00000000, 0x773fe9cc00000000, + 0xe24122f300000000, 0xae69b45000000000, 0xc8bcb48c00000000, + 0x8494222f00000000, 0x11eae91000000000, 0x5dc27fb300000000, + 0xd2e5554a00000000, 0x9ecdc3e900000000, 0x0bb308d600000000, + 0x479b9e7500000000, 0x214e9ea900000000, 0x6d66080a00000000, + 0xf818c33500000000, 0xb430559600000000, 0x75b4b35600000000, + 0x399c25f500000000, 0xace2eeca00000000, 0xe0ca786900000000, + 0x861f78b500000000, 0xca37ee1600000000, 0x5f49252900000000, + 0x1361b38a00000000, 0x388d32e700000000, 0x74a5a44400000000, + 0xe1db6f7b00000000, 0xadf3f9d800000000, 0xcb26f90400000000, + 0x870e6fa700000000, 0x1270a49800000000, 0x5e58323b00000000, + 0x9fdcd4fb00000000, 0xd3f4425800000000, 0x468a896700000000, + 0x0aa21fc400000000, 0x6c771f1800000000, 0x205f89bb00000000, + 0xb521428400000000, 0xf909d42700000000, 0x762efede00000000, + 0x3a06687d00000000, 0xaf78a34200000000, 0xe35035e100000000, + 0x8585353d00000000, 0xc9ada39e00000000, 0x5cd368a100000000, + 0x10fbfe0200000000, 0xd17f18c200000000, 0x9d578e6100000000, + 0x0829455e00000000, 0x4401d3fd00000000, 0x22d4d32100000000, + 0x6efc458200000000, 0xfb828ebd00000000, 0xb7aa181e00000000, + 0xa4cbab9400000000, 0xe8e33d3700000000, 0x7d9df60800000000, + 0x31b560ab00000000, 0x5760607700000000, 0x1b48f6d400000000, + 0x8e363deb00000000, 0xc21eab4800000000, 0x039a4d8800000000, + 0x4fb2db2b00000000, 0xdacc101400000000, 0x96e486b700000000, + 0xf031866b00000000, 0xbc1910c800000000, 0x2967dbf700000000, + 0x654f4d5400000000, 0xea6867ad00000000, 0xa640f10e00000000, + 0x333e3a3100000000, 0x7f16ac9200000000, 0x19c3ac4e00000000, + 0x55eb3aed00000000, 0xc095f1d200000000, 0x8cbd677100000000, + 0x4d3981b100000000, 0x0111171200000000, 0x946fdc2d00000000, + 0xd8474a8e00000000, 0xbe924a5200000000, 0xf2badcf100000000, + 0x67c417ce00000000, 0x2bec816d00000000, 0x311c141500000000, + 0x7d3482b600000000, 0xe84a498900000000, 0xa462df2a00000000, + 0xc2b7dff600000000, 0x8e9f495500000000, 0x1be1826a00000000, + 0x57c914c900000000, 0x964df20900000000, 0xda6564aa00000000, + 0x4f1baf9500000000, 0x0333393600000000, 0x65e639ea00000000, + 0x29ceaf4900000000, 0xbcb0647600000000, 0xf098f2d500000000, + 0x7fbfd82c00000000, 0x33974e8f00000000, 0xa6e985b000000000, + 0xeac1131300000000, 0x8c1413cf00000000, 0xc03c856c00000000, + 0x55424e5300000000, 0x196ad8f000000000, 0xd8ee3e3000000000, + 0x94c6a89300000000, 0x01b863ac00000000, 0x4d90f50f00000000, + 0x2b45f5d300000000, 0x676d637000000000, 0xf213a84f00000000, + 0xbe3b3eec00000000, 0xad5a8d6600000000, 0xe1721bc500000000, + 0x740cd0fa00000000, 0x3824465900000000, 0x5ef1468500000000, + 0x12d9d02600000000, 0x87a71b1900000000, 0xcb8f8dba00000000, + 0x0a0b6b7a00000000, 0x4623fdd900000000, 0xd35d36e600000000, + 0x9f75a04500000000, 0xf9a0a09900000000, 0xb588363a00000000, + 0x20f6fd0500000000, 0x6cde6ba600000000, 0xe3f9415f00000000, + 0xafd1d7fc00000000, 0x3aaf1cc300000000, 0x76878a6000000000, + 0x10528abc00000000, 0x5c7a1c1f00000000, 0xc904d72000000000, + 0x852c418300000000, 0x44a8a74300000000, 0x088031e000000000, + 0x9dfefadf00000000, 0xd1d66c7c00000000, 0xb7036ca000000000, + 0xfb2bfa0300000000, 0x6e55313c00000000, 0x227da79f00000000, + 0x099126f200000000, 0x45b9b05100000000, 0xd0c77b6e00000000, + 0x9cefedcd00000000, 0xfa3aed1100000000, 0xb6127bb200000000, + 0x236cb08d00000000, 0x6f44262e00000000, 0xaec0c0ee00000000, + 0xe2e8564d00000000, 0x77969d7200000000, 0x3bbe0bd100000000, + 0x5d6b0b0d00000000, 0x11439dae00000000, 0x843d569100000000, + 0xc815c03200000000, 0x4732eacb00000000, 0x0b1a7c6800000000, + 0x9e64b75700000000, 0xd24c21f400000000, 0xb499212800000000, + 0xf8b1b78b00000000, 0x6dcf7cb400000000, 0x21e7ea1700000000, + 0xe0630cd700000000, 0xac4b9a7400000000, 0x3935514b00000000, + 0x751dc7e800000000, 0x13c8c73400000000, 0x5fe0519700000000, + 0xca9e9aa800000000, 0x86b60c0b00000000, 0x95d7bf8100000000, + 0xd9ff292200000000, 0x4c81e21d00000000, 0x00a974be00000000, + 0x667c746200000000, 0x2a54e2c100000000, 0xbf2a29fe00000000, + 0xf302bf5d00000000, 0x3286599d00000000, 0x7eaecf3e00000000, + 0xebd0040100000000, 0xa7f892a200000000, 0xc12d927e00000000, + 0x8d0504dd00000000, 0x187bcfe200000000, 0x5453594100000000, + 0xdb7473b800000000, 0x975ce51b00000000, 0x02222e2400000000, + 0x4e0ab88700000000, 0x28dfb85b00000000, 0x64f72ef800000000, + 0xf189e5c700000000, 0xbda1736400000000, 0x7c2595a400000000, + 0x300d030700000000, 0xa573c83800000000, 0xe95b5e9b00000000, + 0x8f8e5e4700000000, 0xc3a6c8e400000000, 0x56d803db00000000, + 0x1af0957800000000}, + {0x0000000000000000, 0x939bc97f00000000, 0x263793ff00000000, + 0xb5ac5a8000000000, 0x0d68572400000000, 0x9ef39e5b00000000, + 0x2b5fc4db00000000, 0xb8c40da400000000, 0x1ad0ae4800000000, + 0x894b673700000000, 0x3ce73db700000000, 0xaf7cf4c800000000, + 0x17b8f96c00000000, 0x8423301300000000, 0x318f6a9300000000, + 0xa214a3ec00000000, 0x34a05d9100000000, 0xa73b94ee00000000, + 0x1297ce6e00000000, 0x810c071100000000, 0x39c80ab500000000, + 0xaa53c3ca00000000, 0x1fff994a00000000, 0x8c64503500000000, + 0x2e70f3d900000000, 0xbdeb3aa600000000, 0x0847602600000000, + 0x9bdca95900000000, 0x2318a4fd00000000, 0xb0836d8200000000, + 0x052f370200000000, 0x96b4fe7d00000000, 0x2946caf900000000, + 0xbadd038600000000, 0x0f71590600000000, 0x9cea907900000000, + 0x242e9ddd00000000, 0xb7b554a200000000, 0x02190e2200000000, + 0x9182c75d00000000, 0x339664b100000000, 0xa00dadce00000000, + 0x15a1f74e00000000, 0x863a3e3100000000, 0x3efe339500000000, + 0xad65faea00000000, 0x18c9a06a00000000, 0x8b52691500000000, + 0x1de6976800000000, 0x8e7d5e1700000000, 0x3bd1049700000000, + 0xa84acde800000000, 0x108ec04c00000000, 0x8315093300000000, + 0x36b953b300000000, 0xa5229acc00000000, 0x0736392000000000, + 0x94adf05f00000000, 0x2101aadf00000000, 0xb29a63a000000000, + 0x0a5e6e0400000000, 0x99c5a77b00000000, 0x2c69fdfb00000000, + 0xbff2348400000000, 0x138ae52800000000, 0x80112c5700000000, + 0x35bd76d700000000, 0xa626bfa800000000, 0x1ee2b20c00000000, + 0x8d797b7300000000, 0x38d521f300000000, 0xab4ee88c00000000, + 0x095a4b6000000000, 0x9ac1821f00000000, 0x2f6dd89f00000000, + 0xbcf611e000000000, 0x04321c4400000000, 0x97a9d53b00000000, + 0x22058fbb00000000, 0xb19e46c400000000, 0x272ab8b900000000, + 0xb4b171c600000000, 0x011d2b4600000000, 0x9286e23900000000, + 0x2a42ef9d00000000, 0xb9d926e200000000, 0x0c757c6200000000, + 0x9feeb51d00000000, 0x3dfa16f100000000, 0xae61df8e00000000, + 0x1bcd850e00000000, 0x88564c7100000000, 0x309241d500000000, + 0xa30988aa00000000, 0x16a5d22a00000000, 0x853e1b5500000000, + 0x3acc2fd100000000, 0xa957e6ae00000000, 0x1cfbbc2e00000000, + 0x8f60755100000000, 0x37a478f500000000, 0xa43fb18a00000000, + 0x1193eb0a00000000, 0x8208227500000000, 0x201c819900000000, + 0xb38748e600000000, 0x062b126600000000, 0x95b0db1900000000, + 0x2d74d6bd00000000, 0xbeef1fc200000000, 0x0b43454200000000, + 0x98d88c3d00000000, 0x0e6c724000000000, 0x9df7bb3f00000000, + 0x285be1bf00000000, 0xbbc028c000000000, 0x0304256400000000, + 0x909fec1b00000000, 0x2533b69b00000000, 0xb6a87fe400000000, + 0x14bcdc0800000000, 0x8727157700000000, 0x328b4ff700000000, + 0xa110868800000000, 0x19d48b2c00000000, 0x8a4f425300000000, + 0x3fe318d300000000, 0xac78d1ac00000000, 0x2614cb5100000000, + 0xb58f022e00000000, 0x002358ae00000000, 0x93b891d100000000, + 0x2b7c9c7500000000, 0xb8e7550a00000000, 0x0d4b0f8a00000000, + 0x9ed0c6f500000000, 0x3cc4651900000000, 0xaf5fac6600000000, + 0x1af3f6e600000000, 0x89683f9900000000, 0x31ac323d00000000, + 0xa237fb4200000000, 0x179ba1c200000000, 0x840068bd00000000, + 0x12b496c000000000, 0x812f5fbf00000000, 0x3483053f00000000, + 0xa718cc4000000000, 0x1fdcc1e400000000, 0x8c47089b00000000, + 0x39eb521b00000000, 0xaa709b6400000000, 0x0864388800000000, + 0x9bfff1f700000000, 0x2e53ab7700000000, 0xbdc8620800000000, + 0x050c6fac00000000, 0x9697a6d300000000, 0x233bfc5300000000, + 0xb0a0352c00000000, 0x0f5201a800000000, 0x9cc9c8d700000000, + 0x2965925700000000, 0xbafe5b2800000000, 0x023a568c00000000, + 0x91a19ff300000000, 0x240dc57300000000, 0xb7960c0c00000000, + 0x1582afe000000000, 0x8619669f00000000, 0x33b53c1f00000000, + 0xa02ef56000000000, 0x18eaf8c400000000, 0x8b7131bb00000000, + 0x3edd6b3b00000000, 0xad46a24400000000, 0x3bf25c3900000000, + 0xa869954600000000, 0x1dc5cfc600000000, 0x8e5e06b900000000, + 0x369a0b1d00000000, 0xa501c26200000000, 0x10ad98e200000000, + 0x8336519d00000000, 0x2122f27100000000, 0xb2b93b0e00000000, + 0x0715618e00000000, 0x948ea8f100000000, 0x2c4aa55500000000, + 0xbfd16c2a00000000, 0x0a7d36aa00000000, 0x99e6ffd500000000, + 0x359e2e7900000000, 0xa605e70600000000, 0x13a9bd8600000000, + 0x803274f900000000, 0x38f6795d00000000, 0xab6db02200000000, + 0x1ec1eaa200000000, 0x8d5a23dd00000000, 0x2f4e803100000000, + 0xbcd5494e00000000, 0x097913ce00000000, 0x9ae2dab100000000, + 0x2226d71500000000, 0xb1bd1e6a00000000, 0x041144ea00000000, + 0x978a8d9500000000, 0x013e73e800000000, 0x92a5ba9700000000, + 0x2709e01700000000, 0xb492296800000000, 0x0c5624cc00000000, + 0x9fcdedb300000000, 0x2a61b73300000000, 0xb9fa7e4c00000000, + 0x1beedda000000000, 0x887514df00000000, 0x3dd94e5f00000000, + 0xae42872000000000, 0x16868a8400000000, 0x851d43fb00000000, + 0x30b1197b00000000, 0xa32ad00400000000, 0x1cd8e48000000000, + 0x8f432dff00000000, 0x3aef777f00000000, 0xa974be0000000000, + 0x11b0b3a400000000, 0x822b7adb00000000, 0x3787205b00000000, + 0xa41ce92400000000, 0x06084ac800000000, 0x959383b700000000, + 0x203fd93700000000, 0xb3a4104800000000, 0x0b601dec00000000, + 0x98fbd49300000000, 0x2d578e1300000000, 0xbecc476c00000000, + 0x2878b91100000000, 0xbbe3706e00000000, 0x0e4f2aee00000000, + 0x9dd4e39100000000, 0x2510ee3500000000, 0xb68b274a00000000, + 0x03277dca00000000, 0x90bcb4b500000000, 0x32a8175900000000, + 0xa133de2600000000, 0x149f84a600000000, 0x87044dd900000000, + 0x3fc0407d00000000, 0xac5b890200000000, 0x19f7d38200000000, + 0x8a6c1afd00000000}, + {0x0000000000000000, 0x650b796900000000, 0xca16f2d200000000, + 0xaf1d8bbb00000000, 0xd52b957e00000000, 0xb020ec1700000000, + 0x1f3d67ac00000000, 0x7a361ec500000000, 0xaa572afd00000000, + 0xcf5c539400000000, 0x6041d82f00000000, 0x054aa14600000000, + 0x7f7cbf8300000000, 0x1a77c6ea00000000, 0xb56a4d5100000000, + 0xd061343800000000, 0x15a9252100000000, 0x70a25c4800000000, + 0xdfbfd7f300000000, 0xbab4ae9a00000000, 0xc082b05f00000000, + 0xa589c93600000000, 0x0a94428d00000000, 0x6f9f3be400000000, + 0xbffe0fdc00000000, 0xdaf576b500000000, 0x75e8fd0e00000000, + 0x10e3846700000000, 0x6ad59aa200000000, 0x0fdee3cb00000000, + 0xa0c3687000000000, 0xc5c8111900000000, 0x2a524b4200000000, + 0x4f59322b00000000, 0xe044b99000000000, 0x854fc0f900000000, + 0xff79de3c00000000, 0x9a72a75500000000, 0x356f2cee00000000, + 0x5064558700000000, 0x800561bf00000000, 0xe50e18d600000000, + 0x4a13936d00000000, 0x2f18ea0400000000, 0x552ef4c100000000, + 0x30258da800000000, 0x9f38061300000000, 0xfa337f7a00000000, + 0x3ffb6e6300000000, 0x5af0170a00000000, 0xf5ed9cb100000000, + 0x90e6e5d800000000, 0xead0fb1d00000000, 0x8fdb827400000000, + 0x20c609cf00000000, 0x45cd70a600000000, 0x95ac449e00000000, + 0xf0a73df700000000, 0x5fbab64c00000000, 0x3ab1cf2500000000, + 0x4087d1e000000000, 0x258ca88900000000, 0x8a91233200000000, + 0xef9a5a5b00000000, 0x54a4968400000000, 0x31afefed00000000, + 0x9eb2645600000000, 0xfbb91d3f00000000, 0x818f03fa00000000, + 0xe4847a9300000000, 0x4b99f12800000000, 0x2e92884100000000, + 0xfef3bc7900000000, 0x9bf8c51000000000, 0x34e54eab00000000, + 0x51ee37c200000000, 0x2bd8290700000000, 0x4ed3506e00000000, + 0xe1cedbd500000000, 0x84c5a2bc00000000, 0x410db3a500000000, + 0x2406cacc00000000, 0x8b1b417700000000, 0xee10381e00000000, + 0x942626db00000000, 0xf12d5fb200000000, 0x5e30d40900000000, + 0x3b3bad6000000000, 0xeb5a995800000000, 0x8e51e03100000000, + 0x214c6b8a00000000, 0x444712e300000000, 0x3e710c2600000000, + 0x5b7a754f00000000, 0xf467fef400000000, 0x916c879d00000000, + 0x7ef6ddc600000000, 0x1bfda4af00000000, 0xb4e02f1400000000, + 0xd1eb567d00000000, 0xabdd48b800000000, 0xced631d100000000, + 0x61cbba6a00000000, 0x04c0c30300000000, 0xd4a1f73b00000000, + 0xb1aa8e5200000000, 0x1eb705e900000000, 0x7bbc7c8000000000, + 0x018a624500000000, 0x64811b2c00000000, 0xcb9c909700000000, + 0xae97e9fe00000000, 0x6b5ff8e700000000, 0x0e54818e00000000, + 0xa1490a3500000000, 0xc442735c00000000, 0xbe746d9900000000, + 0xdb7f14f000000000, 0x74629f4b00000000, 0x1169e62200000000, + 0xc108d21a00000000, 0xa403ab7300000000, 0x0b1e20c800000000, + 0x6e1559a100000000, 0x1423476400000000, 0x71283e0d00000000, + 0xde35b5b600000000, 0xbb3eccdf00000000, 0xe94e5cd200000000, + 0x8c4525bb00000000, 0x2358ae0000000000, 0x4653d76900000000, + 0x3c65c9ac00000000, 0x596eb0c500000000, 0xf6733b7e00000000, + 0x9378421700000000, 0x4319762f00000000, 0x26120f4600000000, + 0x890f84fd00000000, 0xec04fd9400000000, 0x9632e35100000000, + 0xf3399a3800000000, 0x5c24118300000000, 0x392f68ea00000000, + 0xfce779f300000000, 0x99ec009a00000000, 0x36f18b2100000000, + 0x53faf24800000000, 0x29ccec8d00000000, 0x4cc795e400000000, + 0xe3da1e5f00000000, 0x86d1673600000000, 0x56b0530e00000000, + 0x33bb2a6700000000, 0x9ca6a1dc00000000, 0xf9add8b500000000, + 0x839bc67000000000, 0xe690bf1900000000, 0x498d34a200000000, + 0x2c864dcb00000000, 0xc31c179000000000, 0xa6176ef900000000, + 0x090ae54200000000, 0x6c019c2b00000000, 0x163782ee00000000, + 0x733cfb8700000000, 0xdc21703c00000000, 0xb92a095500000000, + 0x694b3d6d00000000, 0x0c40440400000000, 0xa35dcfbf00000000, + 0xc656b6d600000000, 0xbc60a81300000000, 0xd96bd17a00000000, + 0x76765ac100000000, 0x137d23a800000000, 0xd6b532b100000000, + 0xb3be4bd800000000, 0x1ca3c06300000000, 0x79a8b90a00000000, + 0x039ea7cf00000000, 0x6695dea600000000, 0xc988551d00000000, + 0xac832c7400000000, 0x7ce2184c00000000, 0x19e9612500000000, + 0xb6f4ea9e00000000, 0xd3ff93f700000000, 0xa9c98d3200000000, + 0xccc2f45b00000000, 0x63df7fe000000000, 0x06d4068900000000, + 0xbdeaca5600000000, 0xd8e1b33f00000000, 0x77fc388400000000, + 0x12f741ed00000000, 0x68c15f2800000000, 0x0dca264100000000, + 0xa2d7adfa00000000, 0xc7dcd49300000000, 0x17bde0ab00000000, + 0x72b699c200000000, 0xddab127900000000, 0xb8a06b1000000000, + 0xc29675d500000000, 0xa79d0cbc00000000, 0x0880870700000000, + 0x6d8bfe6e00000000, 0xa843ef7700000000, 0xcd48961e00000000, + 0x62551da500000000, 0x075e64cc00000000, 0x7d687a0900000000, + 0x1863036000000000, 0xb77e88db00000000, 0xd275f1b200000000, + 0x0214c58a00000000, 0x671fbce300000000, 0xc802375800000000, + 0xad094e3100000000, 0xd73f50f400000000, 0xb234299d00000000, + 0x1d29a22600000000, 0x7822db4f00000000, 0x97b8811400000000, + 0xf2b3f87d00000000, 0x5dae73c600000000, 0x38a50aaf00000000, + 0x4293146a00000000, 0x27986d0300000000, 0x8885e6b800000000, + 0xed8e9fd100000000, 0x3defabe900000000, 0x58e4d28000000000, + 0xf7f9593b00000000, 0x92f2205200000000, 0xe8c43e9700000000, + 0x8dcf47fe00000000, 0x22d2cc4500000000, 0x47d9b52c00000000, + 0x8211a43500000000, 0xe71add5c00000000, 0x480756e700000000, + 0x2d0c2f8e00000000, 0x573a314b00000000, 0x3231482200000000, + 0x9d2cc39900000000, 0xf827baf000000000, 0x28468ec800000000, + 0x4d4df7a100000000, 0xe2507c1a00000000, 0x875b057300000000, + 0xfd6d1bb600000000, 0x986662df00000000, 0x377be96400000000, + 0x5270900d00000000}, + {0x0000000000000000, 0xdcecb13d00000000, 0xb8d9637b00000000, + 0x6435d24600000000, 0x70b3c7f600000000, 0xac5f76cb00000000, + 0xc86aa48d00000000, 0x148615b000000000, 0xa160fe3600000000, + 0x7d8c4f0b00000000, 0x19b99d4d00000000, 0xc5552c7000000000, + 0xd1d339c000000000, 0x0d3f88fd00000000, 0x690a5abb00000000, + 0xb5e6eb8600000000, 0x42c1fc6d00000000, 0x9e2d4d5000000000, + 0xfa189f1600000000, 0x26f42e2b00000000, 0x32723b9b00000000, + 0xee9e8aa600000000, 0x8aab58e000000000, 0x5647e9dd00000000, + 0xe3a1025b00000000, 0x3f4db36600000000, 0x5b78612000000000, + 0x8794d01d00000000, 0x9312c5ad00000000, 0x4ffe749000000000, + 0x2bcba6d600000000, 0xf72717eb00000000, 0x8482f9db00000000, + 0x586e48e600000000, 0x3c5b9aa000000000, 0xe0b72b9d00000000, + 0xf4313e2d00000000, 0x28dd8f1000000000, 0x4ce85d5600000000, + 0x9004ec6b00000000, 0x25e207ed00000000, 0xf90eb6d000000000, + 0x9d3b649600000000, 0x41d7d5ab00000000, 0x5551c01b00000000, + 0x89bd712600000000, 0xed88a36000000000, 0x3164125d00000000, + 0xc64305b600000000, 0x1aafb48b00000000, 0x7e9a66cd00000000, + 0xa276d7f000000000, 0xb6f0c24000000000, 0x6a1c737d00000000, + 0x0e29a13b00000000, 0xd2c5100600000000, 0x6723fb8000000000, + 0xbbcf4abd00000000, 0xdffa98fb00000000, 0x031629c600000000, + 0x17903c7600000000, 0xcb7c8d4b00000000, 0xaf495f0d00000000, + 0x73a5ee3000000000, 0x4903826c00000000, 0x95ef335100000000, + 0xf1dae11700000000, 0x2d36502a00000000, 0x39b0459a00000000, + 0xe55cf4a700000000, 0x816926e100000000, 0x5d8597dc00000000, + 0xe8637c5a00000000, 0x348fcd6700000000, 0x50ba1f2100000000, + 0x8c56ae1c00000000, 0x98d0bbac00000000, 0x443c0a9100000000, + 0x2009d8d700000000, 0xfce569ea00000000, 0x0bc27e0100000000, + 0xd72ecf3c00000000, 0xb31b1d7a00000000, 0x6ff7ac4700000000, + 0x7b71b9f700000000, 0xa79d08ca00000000, 0xc3a8da8c00000000, + 0x1f446bb100000000, 0xaaa2803700000000, 0x764e310a00000000, + 0x127be34c00000000, 0xce97527100000000, 0xda1147c100000000, + 0x06fdf6fc00000000, 0x62c824ba00000000, 0xbe24958700000000, + 0xcd817bb700000000, 0x116dca8a00000000, 0x755818cc00000000, + 0xa9b4a9f100000000, 0xbd32bc4100000000, 0x61de0d7c00000000, + 0x05ebdf3a00000000, 0xd9076e0700000000, 0x6ce1858100000000, + 0xb00d34bc00000000, 0xd438e6fa00000000, 0x08d457c700000000, + 0x1c52427700000000, 0xc0bef34a00000000, 0xa48b210c00000000, + 0x7867903100000000, 0x8f4087da00000000, 0x53ac36e700000000, + 0x3799e4a100000000, 0xeb75559c00000000, 0xfff3402c00000000, + 0x231ff11100000000, 0x472a235700000000, 0x9bc6926a00000000, + 0x2e2079ec00000000, 0xf2ccc8d100000000, 0x96f91a9700000000, + 0x4a15abaa00000000, 0x5e93be1a00000000, 0x827f0f2700000000, + 0xe64add6100000000, 0x3aa66c5c00000000, 0x920604d900000000, + 0x4eeab5e400000000, 0x2adf67a200000000, 0xf633d69f00000000, + 0xe2b5c32f00000000, 0x3e59721200000000, 0x5a6ca05400000000, + 0x8680116900000000, 0x3366faef00000000, 0xef8a4bd200000000, + 0x8bbf999400000000, 0x575328a900000000, 0x43d53d1900000000, + 0x9f398c2400000000, 0xfb0c5e6200000000, 0x27e0ef5f00000000, + 0xd0c7f8b400000000, 0x0c2b498900000000, 0x681e9bcf00000000, + 0xb4f22af200000000, 0xa0743f4200000000, 0x7c988e7f00000000, + 0x18ad5c3900000000, 0xc441ed0400000000, 0x71a7068200000000, + 0xad4bb7bf00000000, 0xc97e65f900000000, 0x1592d4c400000000, + 0x0114c17400000000, 0xddf8704900000000, 0xb9cda20f00000000, + 0x6521133200000000, 0x1684fd0200000000, 0xca684c3f00000000, + 0xae5d9e7900000000, 0x72b12f4400000000, 0x66373af400000000, + 0xbadb8bc900000000, 0xdeee598f00000000, 0x0202e8b200000000, + 0xb7e4033400000000, 0x6b08b20900000000, 0x0f3d604f00000000, + 0xd3d1d17200000000, 0xc757c4c200000000, 0x1bbb75ff00000000, + 0x7f8ea7b900000000, 0xa362168400000000, 0x5445016f00000000, + 0x88a9b05200000000, 0xec9c621400000000, 0x3070d32900000000, + 0x24f6c69900000000, 0xf81a77a400000000, 0x9c2fa5e200000000, + 0x40c314df00000000, 0xf525ff5900000000, 0x29c94e6400000000, + 0x4dfc9c2200000000, 0x91102d1f00000000, 0x859638af00000000, + 0x597a899200000000, 0x3d4f5bd400000000, 0xe1a3eae900000000, + 0xdb0586b500000000, 0x07e9378800000000, 0x63dce5ce00000000, + 0xbf3054f300000000, 0xabb6414300000000, 0x775af07e00000000, + 0x136f223800000000, 0xcf83930500000000, 0x7a65788300000000, + 0xa689c9be00000000, 0xc2bc1bf800000000, 0x1e50aac500000000, + 0x0ad6bf7500000000, 0xd63a0e4800000000, 0xb20fdc0e00000000, + 0x6ee36d3300000000, 0x99c47ad800000000, 0x4528cbe500000000, + 0x211d19a300000000, 0xfdf1a89e00000000, 0xe977bd2e00000000, + 0x359b0c1300000000, 0x51aede5500000000, 0x8d426f6800000000, + 0x38a484ee00000000, 0xe44835d300000000, 0x807de79500000000, + 0x5c9156a800000000, 0x4817431800000000, 0x94fbf22500000000, + 0xf0ce206300000000, 0x2c22915e00000000, 0x5f877f6e00000000, + 0x836bce5300000000, 0xe75e1c1500000000, 0x3bb2ad2800000000, + 0x2f34b89800000000, 0xf3d809a500000000, 0x97eddbe300000000, + 0x4b016ade00000000, 0xfee7815800000000, 0x220b306500000000, + 0x463ee22300000000, 0x9ad2531e00000000, 0x8e5446ae00000000, + 0x52b8f79300000000, 0x368d25d500000000, 0xea6194e800000000, + 0x1d46830300000000, 0xc1aa323e00000000, 0xa59fe07800000000, + 0x7973514500000000, 0x6df544f500000000, 0xb119f5c800000000, + 0xd52c278e00000000, 0x09c096b300000000, 0xbc267d3500000000, + 0x60cacc0800000000, 0x04ff1e4e00000000, 0xd813af7300000000, + 0xcc95bac300000000, 0x10790bfe00000000, 0x744cd9b800000000, + 0xa8a0688500000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x81256527, 0xd93bcc0f, 0x581ea928, 0x69069e5f, + 0xe823fb78, 0xb03d5250, 0x31183777, 0xd20d3cbe, 0x53285999, + 0x0b36f0b1, 0x8a139596, 0xbb0ba2e1, 0x3a2ec7c6, 0x62306eee, + 0xe3150bc9, 0x7f6b7f3d, 0xfe4e1a1a, 0xa650b332, 0x2775d615, + 0x166de162, 0x97488445, 0xcf562d6d, 0x4e73484a, 0xad664383, + 0x2c4326a4, 0x745d8f8c, 0xf578eaab, 0xc460dddc, 0x4545b8fb, + 0x1d5b11d3, 0x9c7e74f4, 0xfed6fe7a, 0x7ff39b5d, 0x27ed3275, + 0xa6c85752, 0x97d06025, 0x16f50502, 0x4eebac2a, 0xcfcec90d, + 0x2cdbc2c4, 0xadfea7e3, 0xf5e00ecb, 0x74c56bec, 0x45dd5c9b, + 0xc4f839bc, 0x9ce69094, 0x1dc3f5b3, 0x81bd8147, 0x0098e460, + 0x58864d48, 0xd9a3286f, 0xe8bb1f18, 0x699e7a3f, 0x3180d317, + 0xb0a5b630, 0x53b0bdf9, 0xd295d8de, 0x8a8b71f6, 0x0bae14d1, + 0x3ab623a6, 0xbb934681, 0xe38defa9, 0x62a88a8e, 0x26dcfab5, + 0xa7f99f92, 0xffe736ba, 0x7ec2539d, 0x4fda64ea, 0xceff01cd, + 0x96e1a8e5, 0x17c4cdc2, 0xf4d1c60b, 0x75f4a32c, 0x2dea0a04, + 0xaccf6f23, 0x9dd75854, 0x1cf23d73, 0x44ec945b, 0xc5c9f17c, + 0x59b78588, 0xd892e0af, 0x808c4987, 0x01a92ca0, 0x30b11bd7, + 0xb1947ef0, 0xe98ad7d8, 0x68afb2ff, 0x8bbab936, 0x0a9fdc11, + 0x52817539, 0xd3a4101e, 0xe2bc2769, 0x6399424e, 0x3b87eb66, + 0xbaa28e41, 0xd80a04cf, 0x592f61e8, 0x0131c8c0, 0x8014ade7, + 0xb10c9a90, 0x3029ffb7, 0x6837569f, 0xe91233b8, 0x0a073871, + 0x8b225d56, 0xd33cf47e, 0x52199159, 0x6301a62e, 0xe224c309, + 0xba3a6a21, 0x3b1f0f06, 0xa7617bf2, 0x26441ed5, 0x7e5ab7fd, + 0xff7fd2da, 0xce67e5ad, 0x4f42808a, 0x175c29a2, 0x96794c85, + 0x756c474c, 0xf449226b, 0xac578b43, 0x2d72ee64, 0x1c6ad913, + 0x9d4fbc34, 0xc551151c, 0x4474703b, 0x4db9f56a, 0xcc9c904d, + 0x94823965, 0x15a75c42, 0x24bf6b35, 0xa59a0e12, 0xfd84a73a, + 0x7ca1c21d, 0x9fb4c9d4, 0x1e91acf3, 0x468f05db, 0xc7aa60fc, + 0xf6b2578b, 0x779732ac, 0x2f899b84, 0xaeacfea3, 0x32d28a57, + 0xb3f7ef70, 0xebe94658, 0x6acc237f, 0x5bd41408, 0xdaf1712f, + 0x82efd807, 0x03cabd20, 0xe0dfb6e9, 0x61fad3ce, 0x39e47ae6, + 0xb8c11fc1, 0x89d928b6, 0x08fc4d91, 0x50e2e4b9, 0xd1c7819e, + 0xb36f0b10, 0x324a6e37, 0x6a54c71f, 0xeb71a238, 0xda69954f, + 0x5b4cf068, 0x03525940, 0x82773c67, 0x616237ae, 0xe0475289, + 0xb859fba1, 0x397c9e86, 0x0864a9f1, 0x8941ccd6, 0xd15f65fe, + 0x507a00d9, 0xcc04742d, 0x4d21110a, 0x153fb822, 0x941add05, + 0xa502ea72, 0x24278f55, 0x7c39267d, 0xfd1c435a, 0x1e094893, + 0x9f2c2db4, 0xc732849c, 0x4617e1bb, 0x770fd6cc, 0xf62ab3eb, + 0xae341ac3, 0x2f117fe4, 0x6b650fdf, 0xea406af8, 0xb25ec3d0, + 0x337ba6f7, 0x02639180, 0x8346f4a7, 0xdb585d8f, 0x5a7d38a8, + 0xb9683361, 0x384d5646, 0x6053ff6e, 0xe1769a49, 0xd06ead3e, + 0x514bc819, 0x09556131, 0x88700416, 0x140e70e2, 0x952b15c5, + 0xcd35bced, 0x4c10d9ca, 0x7d08eebd, 0xfc2d8b9a, 0xa43322b2, + 0x25164795, 0xc6034c5c, 0x4726297b, 0x1f388053, 0x9e1de574, + 0xaf05d203, 0x2e20b724, 0x763e1e0c, 0xf71b7b2b, 0x95b3f1a5, + 0x14969482, 0x4c883daa, 0xcdad588d, 0xfcb56ffa, 0x7d900add, + 0x258ea3f5, 0xa4abc6d2, 0x47becd1b, 0xc69ba83c, 0x9e850114, + 0x1fa06433, 0x2eb85344, 0xaf9d3663, 0xf7839f4b, 0x76a6fa6c, + 0xead88e98, 0x6bfdebbf, 0x33e34297, 0xb2c627b0, 0x83de10c7, + 0x02fb75e0, 0x5ae5dcc8, 0xdbc0b9ef, 0x38d5b226, 0xb9f0d701, + 0xe1ee7e29, 0x60cb1b0e, 0x51d32c79, 0xd0f6495e, 0x88e8e076, + 0x09cd8551}, + {0x00000000, 0x9b73ead4, 0xed96d3e9, 0x76e5393d, 0x005ca193, + 0x9b2f4b47, 0xedca727a, 0x76b998ae, 0x00b94326, 0x9bcaa9f2, + 0xed2f90cf, 0x765c7a1b, 0x00e5e2b5, 0x9b960861, 0xed73315c, + 0x7600db88, 0x0172864c, 0x9a016c98, 0xece455a5, 0x7797bf71, + 0x012e27df, 0x9a5dcd0b, 0xecb8f436, 0x77cb1ee2, 0x01cbc56a, + 0x9ab82fbe, 0xec5d1683, 0x772efc57, 0x019764f9, 0x9ae48e2d, + 0xec01b710, 0x77725dc4, 0x02e50c98, 0x9996e64c, 0xef73df71, + 0x740035a5, 0x02b9ad0b, 0x99ca47df, 0xef2f7ee2, 0x745c9436, + 0x025c4fbe, 0x992fa56a, 0xefca9c57, 0x74b97683, 0x0200ee2d, + 0x997304f9, 0xef963dc4, 0x74e5d710, 0x03978ad4, 0x98e46000, + 0xee01593d, 0x7572b3e9, 0x03cb2b47, 0x98b8c193, 0xee5df8ae, + 0x752e127a, 0x032ec9f2, 0x985d2326, 0xeeb81a1b, 0x75cbf0cf, + 0x03726861, 0x980182b5, 0xeee4bb88, 0x7597515c, 0x05ca1930, + 0x9eb9f3e4, 0xe85ccad9, 0x732f200d, 0x0596b8a3, 0x9ee55277, + 0xe8006b4a, 0x7373819e, 0x05735a16, 0x9e00b0c2, 0xe8e589ff, + 0x7396632b, 0x052ffb85, 0x9e5c1151, 0xe8b9286c, 0x73cac2b8, + 0x04b89f7c, 0x9fcb75a8, 0xe92e4c95, 0x725da641, 0x04e43eef, + 0x9f97d43b, 0xe972ed06, 0x720107d2, 0x0401dc5a, 0x9f72368e, + 0xe9970fb3, 0x72e4e567, 0x045d7dc9, 0x9f2e971d, 0xe9cbae20, + 0x72b844f4, 0x072f15a8, 0x9c5cff7c, 0xeab9c641, 0x71ca2c95, + 0x0773b43b, 0x9c005eef, 0xeae567d2, 0x71968d06, 0x0796568e, + 0x9ce5bc5a, 0xea008567, 0x71736fb3, 0x07caf71d, 0x9cb91dc9, + 0xea5c24f4, 0x712fce20, 0x065d93e4, 0x9d2e7930, 0xebcb400d, + 0x70b8aad9, 0x06013277, 0x9d72d8a3, 0xeb97e19e, 0x70e40b4a, + 0x06e4d0c2, 0x9d973a16, 0xeb72032b, 0x7001e9ff, 0x06b87151, + 0x9dcb9b85, 0xeb2ea2b8, 0x705d486c, 0x0b943260, 0x90e7d8b4, + 0xe602e189, 0x7d710b5d, 0x0bc893f3, 0x90bb7927, 0xe65e401a, + 0x7d2daace, 0x0b2d7146, 0x905e9b92, 0xe6bba2af, 0x7dc8487b, + 0x0b71d0d5, 0x90023a01, 0xe6e7033c, 0x7d94e9e8, 0x0ae6b42c, + 0x91955ef8, 0xe77067c5, 0x7c038d11, 0x0aba15bf, 0x91c9ff6b, + 0xe72cc656, 0x7c5f2c82, 0x0a5ff70a, 0x912c1dde, 0xe7c924e3, + 0x7cbace37, 0x0a035699, 0x9170bc4d, 0xe7958570, 0x7ce66fa4, + 0x09713ef8, 0x9202d42c, 0xe4e7ed11, 0x7f9407c5, 0x092d9f6b, + 0x925e75bf, 0xe4bb4c82, 0x7fc8a656, 0x09c87dde, 0x92bb970a, + 0xe45eae37, 0x7f2d44e3, 0x0994dc4d, 0x92e73699, 0xe4020fa4, + 0x7f71e570, 0x0803b8b4, 0x93705260, 0xe5956b5d, 0x7ee68189, + 0x085f1927, 0x932cf3f3, 0xe5c9cace, 0x7eba201a, 0x08bafb92, + 0x93c91146, 0xe52c287b, 0x7e5fc2af, 0x08e65a01, 0x9395b0d5, + 0xe57089e8, 0x7e03633c, 0x0e5e2b50, 0x952dc184, 0xe3c8f8b9, + 0x78bb126d, 0x0e028ac3, 0x95716017, 0xe394592a, 0x78e7b3fe, + 0x0ee76876, 0x959482a2, 0xe371bb9f, 0x7802514b, 0x0ebbc9e5, + 0x95c82331, 0xe32d1a0c, 0x785ef0d8, 0x0f2cad1c, 0x945f47c8, + 0xe2ba7ef5, 0x79c99421, 0x0f700c8f, 0x9403e65b, 0xe2e6df66, + 0x799535b2, 0x0f95ee3a, 0x94e604ee, 0xe2033dd3, 0x7970d707, + 0x0fc94fa9, 0x94baa57d, 0xe25f9c40, 0x792c7694, 0x0cbb27c8, + 0x97c8cd1c, 0xe12df421, 0x7a5e1ef5, 0x0ce7865b, 0x97946c8f, + 0xe17155b2, 0x7a02bf66, 0x0c0264ee, 0x97718e3a, 0xe194b707, + 0x7ae75dd3, 0x0c5ec57d, 0x972d2fa9, 0xe1c81694, 0x7abbfc40, + 0x0dc9a184, 0x96ba4b50, 0xe05f726d, 0x7b2c98b9, 0x0d950017, + 0x96e6eac3, 0xe003d3fe, 0x7b70392a, 0x0d70e2a2, 0x96030876, + 0xe0e6314b, 0x7b95db9f, 0x0d2c4331, 0x965fa9e5, 0xe0ba90d8, + 0x7bc97a0c}, + {0x00000000, 0x172864c0, 0x2e50c980, 0x3978ad40, 0x5ca19300, + 0x4b89f7c0, 0x72f15a80, 0x65d93e40, 0xb9432600, 0xae6b42c0, + 0x9713ef80, 0x803b8b40, 0xe5e2b500, 0xf2cad1c0, 0xcbb27c80, + 0xdc9a1840, 0xa9f74a41, 0xbedf2e81, 0x87a783c1, 0x908fe701, + 0xf556d941, 0xe27ebd81, 0xdb0610c1, 0xcc2e7401, 0x10b46c41, + 0x079c0881, 0x3ee4a5c1, 0x29ccc101, 0x4c15ff41, 0x5b3d9b81, + 0x624536c1, 0x756d5201, 0x889f92c3, 0x9fb7f603, 0xa6cf5b43, + 0xb1e73f83, 0xd43e01c3, 0xc3166503, 0xfa6ec843, 0xed46ac83, + 0x31dcb4c3, 0x26f4d003, 0x1f8c7d43, 0x08a41983, 0x6d7d27c3, + 0x7a554303, 0x432dee43, 0x54058a83, 0x2168d882, 0x3640bc42, + 0x0f381102, 0x181075c2, 0x7dc94b82, 0x6ae12f42, 0x53998202, + 0x44b1e6c2, 0x982bfe82, 0x8f039a42, 0xb67b3702, 0xa15353c2, + 0xc48a6d82, 0xd3a20942, 0xeadaa402, 0xfdf2c0c2, 0xca4e23c7, + 0xdd664707, 0xe41eea47, 0xf3368e87, 0x96efb0c7, 0x81c7d407, + 0xb8bf7947, 0xaf971d87, 0x730d05c7, 0x64256107, 0x5d5dcc47, + 0x4a75a887, 0x2fac96c7, 0x3884f207, 0x01fc5f47, 0x16d43b87, + 0x63b96986, 0x74910d46, 0x4de9a006, 0x5ac1c4c6, 0x3f18fa86, + 0x28309e46, 0x11483306, 0x066057c6, 0xdafa4f86, 0xcdd22b46, + 0xf4aa8606, 0xe382e2c6, 0x865bdc86, 0x9173b846, 0xa80b1506, + 0xbf2371c6, 0x42d1b104, 0x55f9d5c4, 0x6c817884, 0x7ba91c44, + 0x1e702204, 0x095846c4, 0x3020eb84, 0x27088f44, 0xfb929704, + 0xecbaf3c4, 0xd5c25e84, 0xc2ea3a44, 0xa7330404, 0xb01b60c4, + 0x8963cd84, 0x9e4ba944, 0xeb26fb45, 0xfc0e9f85, 0xc57632c5, + 0xd25e5605, 0xb7876845, 0xa0af0c85, 0x99d7a1c5, 0x8effc505, + 0x5265dd45, 0x454db985, 0x7c3514c5, 0x6b1d7005, 0x0ec44e45, + 0x19ec2a85, 0x209487c5, 0x37bce305, 0x4fed41cf, 0x58c5250f, + 0x61bd884f, 0x7695ec8f, 0x134cd2cf, 0x0464b60f, 0x3d1c1b4f, + 0x2a347f8f, 0xf6ae67cf, 0xe186030f, 0xd8feae4f, 0xcfd6ca8f, + 0xaa0ff4cf, 0xbd27900f, 0x845f3d4f, 0x9377598f, 0xe61a0b8e, + 0xf1326f4e, 0xc84ac20e, 0xdf62a6ce, 0xbabb988e, 0xad93fc4e, + 0x94eb510e, 0x83c335ce, 0x5f592d8e, 0x4871494e, 0x7109e40e, + 0x662180ce, 0x03f8be8e, 0x14d0da4e, 0x2da8770e, 0x3a8013ce, + 0xc772d30c, 0xd05ab7cc, 0xe9221a8c, 0xfe0a7e4c, 0x9bd3400c, + 0x8cfb24cc, 0xb583898c, 0xa2abed4c, 0x7e31f50c, 0x691991cc, + 0x50613c8c, 0x4749584c, 0x2290660c, 0x35b802cc, 0x0cc0af8c, + 0x1be8cb4c, 0x6e85994d, 0x79adfd8d, 0x40d550cd, 0x57fd340d, + 0x32240a4d, 0x250c6e8d, 0x1c74c3cd, 0x0b5ca70d, 0xd7c6bf4d, + 0xc0eedb8d, 0xf99676cd, 0xeebe120d, 0x8b672c4d, 0x9c4f488d, + 0xa537e5cd, 0xb21f810d, 0x85a36208, 0x928b06c8, 0xabf3ab88, + 0xbcdbcf48, 0xd902f108, 0xce2a95c8, 0xf7523888, 0xe07a5c48, + 0x3ce04408, 0x2bc820c8, 0x12b08d88, 0x0598e948, 0x6041d708, + 0x7769b3c8, 0x4e111e88, 0x59397a48, 0x2c542849, 0x3b7c4c89, + 0x0204e1c9, 0x152c8509, 0x70f5bb49, 0x67dddf89, 0x5ea572c9, + 0x498d1609, 0x95170e49, 0x823f6a89, 0xbb47c7c9, 0xac6fa309, + 0xc9b69d49, 0xde9ef989, 0xe7e654c9, 0xf0ce3009, 0x0d3cf0cb, + 0x1a14940b, 0x236c394b, 0x34445d8b, 0x519d63cb, 0x46b5070b, + 0x7fcdaa4b, 0x68e5ce8b, 0xb47fd6cb, 0xa357b20b, 0x9a2f1f4b, + 0x8d077b8b, 0xe8de45cb, 0xfff6210b, 0xc68e8c4b, 0xd1a6e88b, + 0xa4cbba8a, 0xb3e3de4a, 0x8a9b730a, 0x9db317ca, 0xf86a298a, + 0xef424d4a, 0xd63ae00a, 0xc11284ca, 0x1d889c8a, 0x0aa0f84a, + 0x33d8550a, 0x24f031ca, 0x41290f8a, 0x56016b4a, 0x6f79c60a, + 0x7851a2ca}, + {0x00000000, 0x9fda839e, 0xe4c4017d, 0x7b1e82e3, 0x12f904bb, + 0x8d238725, 0xf63d05c6, 0x69e78658, 0x25f20976, 0xba288ae8, + 0xc136080b, 0x5eec8b95, 0x370b0dcd, 0xa8d18e53, 0xd3cf0cb0, + 0x4c158f2e, 0x4be412ec, 0xd43e9172, 0xaf201391, 0x30fa900f, + 0x591d1657, 0xc6c795c9, 0xbdd9172a, 0x220394b4, 0x6e161b9a, + 0xf1cc9804, 0x8ad21ae7, 0x15089979, 0x7cef1f21, 0xe3359cbf, + 0x982b1e5c, 0x07f19dc2, 0x97c825d8, 0x0812a646, 0x730c24a5, + 0xecd6a73b, 0x85312163, 0x1aeba2fd, 0x61f5201e, 0xfe2fa380, + 0xb23a2cae, 0x2de0af30, 0x56fe2dd3, 0xc924ae4d, 0xa0c32815, + 0x3f19ab8b, 0x44072968, 0xdbddaaf6, 0xdc2c3734, 0x43f6b4aa, + 0x38e83649, 0xa732b5d7, 0xced5338f, 0x510fb011, 0x2a1132f2, + 0xb5cbb16c, 0xf9de3e42, 0x6604bddc, 0x1d1a3f3f, 0x82c0bca1, + 0xeb273af9, 0x74fdb967, 0x0fe33b84, 0x9039b81a, 0xf4e14df1, + 0x6b3bce6f, 0x10254c8c, 0x8fffcf12, 0xe618494a, 0x79c2cad4, + 0x02dc4837, 0x9d06cba9, 0xd1134487, 0x4ec9c719, 0x35d745fa, + 0xaa0dc664, 0xc3ea403c, 0x5c30c3a2, 0x272e4141, 0xb8f4c2df, + 0xbf055f1d, 0x20dfdc83, 0x5bc15e60, 0xc41bddfe, 0xadfc5ba6, + 0x3226d838, 0x49385adb, 0xd6e2d945, 0x9af7566b, 0x052dd5f5, + 0x7e335716, 0xe1e9d488, 0x880e52d0, 0x17d4d14e, 0x6cca53ad, + 0xf310d033, 0x63296829, 0xfcf3ebb7, 0x87ed6954, 0x1837eaca, + 0x71d06c92, 0xee0aef0c, 0x95146def, 0x0aceee71, 0x46db615f, + 0xd901e2c1, 0xa21f6022, 0x3dc5e3bc, 0x542265e4, 0xcbf8e67a, + 0xb0e66499, 0x2f3ce707, 0x28cd7ac5, 0xb717f95b, 0xcc097bb8, + 0x53d3f826, 0x3a347e7e, 0xa5eefde0, 0xdef07f03, 0x412afc9d, + 0x0d3f73b3, 0x92e5f02d, 0xe9fb72ce, 0x7621f150, 0x1fc67708, + 0x801cf496, 0xfb027675, 0x64d8f5eb, 0x32b39da3, 0xad691e3d, + 0xd6779cde, 0x49ad1f40, 0x204a9918, 0xbf901a86, 0xc48e9865, + 0x5b541bfb, 0x174194d5, 0x889b174b, 0xf38595a8, 0x6c5f1636, + 0x05b8906e, 0x9a6213f0, 0xe17c9113, 0x7ea6128d, 0x79578f4f, + 0xe68d0cd1, 0x9d938e32, 0x02490dac, 0x6bae8bf4, 0xf474086a, + 0x8f6a8a89, 0x10b00917, 0x5ca58639, 0xc37f05a7, 0xb8618744, + 0x27bb04da, 0x4e5c8282, 0xd186011c, 0xaa9883ff, 0x35420061, + 0xa57bb87b, 0x3aa13be5, 0x41bfb906, 0xde653a98, 0xb782bcc0, + 0x28583f5e, 0x5346bdbd, 0xcc9c3e23, 0x8089b10d, 0x1f533293, + 0x644db070, 0xfb9733ee, 0x9270b5b6, 0x0daa3628, 0x76b4b4cb, + 0xe96e3755, 0xee9faa97, 0x71452909, 0x0a5babea, 0x95812874, + 0xfc66ae2c, 0x63bc2db2, 0x18a2af51, 0x87782ccf, 0xcb6da3e1, + 0x54b7207f, 0x2fa9a29c, 0xb0732102, 0xd994a75a, 0x464e24c4, + 0x3d50a627, 0xa28a25b9, 0xc652d052, 0x598853cc, 0x2296d12f, + 0xbd4c52b1, 0xd4abd4e9, 0x4b715777, 0x306fd594, 0xafb5560a, + 0xe3a0d924, 0x7c7a5aba, 0x0764d859, 0x98be5bc7, 0xf159dd9f, + 0x6e835e01, 0x159ddce2, 0x8a475f7c, 0x8db6c2be, 0x126c4120, + 0x6972c3c3, 0xf6a8405d, 0x9f4fc605, 0x0095459b, 0x7b8bc778, + 0xe45144e6, 0xa844cbc8, 0x379e4856, 0x4c80cab5, 0xd35a492b, + 0xbabdcf73, 0x25674ced, 0x5e79ce0e, 0xc1a34d90, 0x519af58a, + 0xce407614, 0xb55ef4f7, 0x2a847769, 0x4363f131, 0xdcb972af, + 0xa7a7f04c, 0x387d73d2, 0x7468fcfc, 0xebb27f62, 0x90acfd81, + 0x0f767e1f, 0x6691f847, 0xf94b7bd9, 0x8255f93a, 0x1d8f7aa4, + 0x1a7ee766, 0x85a464f8, 0xfebae61b, 0x61606585, 0x0887e3dd, + 0x975d6043, 0xec43e2a0, 0x7399613e, 0x3f8cee10, 0xa0566d8e, + 0xdb48ef6d, 0x44926cf3, 0x2d75eaab, 0xb2af6935, 0xc9b1ebd6, + 0x566b6848}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x9e83da9f, 0x7d01c4e4, 0xe3821e7b, 0xbb04f912, + 0x2587238d, 0xc6053df6, 0x5886e769, 0x7609f225, 0xe88a28ba, + 0x0b0836c1, 0x958bec5e, 0xcd0d0b37, 0x538ed1a8, 0xb00ccfd3, + 0x2e8f154c, 0xec12e44b, 0x72913ed4, 0x911320af, 0x0f90fa30, + 0x57161d59, 0xc995c7c6, 0x2a17d9bd, 0xb4940322, 0x9a1b166e, + 0x0498ccf1, 0xe71ad28a, 0x79990815, 0x211fef7c, 0xbf9c35e3, + 0x5c1e2b98, 0xc29df107, 0xd825c897, 0x46a61208, 0xa5240c73, + 0x3ba7d6ec, 0x63213185, 0xfda2eb1a, 0x1e20f561, 0x80a32ffe, + 0xae2c3ab2, 0x30afe02d, 0xd32dfe56, 0x4dae24c9, 0x1528c3a0, + 0x8bab193f, 0x68290744, 0xf6aadddb, 0x34372cdc, 0xaab4f643, + 0x4936e838, 0xd7b532a7, 0x8f33d5ce, 0x11b00f51, 0xf232112a, + 0x6cb1cbb5, 0x423edef9, 0xdcbd0466, 0x3f3f1a1d, 0xa1bcc082, + 0xf93a27eb, 0x67b9fd74, 0x843be30f, 0x1ab83990, 0xf14de1f4, + 0x6fce3b6b, 0x8c4c2510, 0x12cfff8f, 0x4a4918e6, 0xd4cac279, + 0x3748dc02, 0xa9cb069d, 0x874413d1, 0x19c7c94e, 0xfa45d735, + 0x64c60daa, 0x3c40eac3, 0xa2c3305c, 0x41412e27, 0xdfc2f4b8, + 0x1d5f05bf, 0x83dcdf20, 0x605ec15b, 0xfedd1bc4, 0xa65bfcad, + 0x38d82632, 0xdb5a3849, 0x45d9e2d6, 0x6b56f79a, 0xf5d52d05, + 0x1657337e, 0x88d4e9e1, 0xd0520e88, 0x4ed1d417, 0xad53ca6c, + 0x33d010f3, 0x29682963, 0xb7ebf3fc, 0x5469ed87, 0xcaea3718, + 0x926cd071, 0x0cef0aee, 0xef6d1495, 0x71eece0a, 0x5f61db46, + 0xc1e201d9, 0x22601fa2, 0xbce3c53d, 0xe4652254, 0x7ae6f8cb, + 0x9964e6b0, 0x07e73c2f, 0xc57acd28, 0x5bf917b7, 0xb87b09cc, + 0x26f8d353, 0x7e7e343a, 0xe0fdeea5, 0x037ff0de, 0x9dfc2a41, + 0xb3733f0d, 0x2df0e592, 0xce72fbe9, 0x50f12176, 0x0877c61f, + 0x96f41c80, 0x757602fb, 0xebf5d864, 0xa39db332, 0x3d1e69ad, + 0xde9c77d6, 0x401fad49, 0x18994a20, 0x861a90bf, 0x65988ec4, + 0xfb1b545b, 0xd5944117, 0x4b179b88, 0xa89585f3, 0x36165f6c, + 0x6e90b805, 0xf013629a, 0x13917ce1, 0x8d12a67e, 0x4f8f5779, + 0xd10c8de6, 0x328e939d, 0xac0d4902, 0xf48bae6b, 0x6a0874f4, + 0x898a6a8f, 0x1709b010, 0x3986a55c, 0xa7057fc3, 0x448761b8, + 0xda04bb27, 0x82825c4e, 0x1c0186d1, 0xff8398aa, 0x61004235, + 0x7bb87ba5, 0xe53ba13a, 0x06b9bf41, 0x983a65de, 0xc0bc82b7, + 0x5e3f5828, 0xbdbd4653, 0x233e9ccc, 0x0db18980, 0x9332531f, + 0x70b04d64, 0xee3397fb, 0xb6b57092, 0x2836aa0d, 0xcbb4b476, + 0x55376ee9, 0x97aa9fee, 0x09294571, 0xeaab5b0a, 0x74288195, + 0x2cae66fc, 0xb22dbc63, 0x51afa218, 0xcf2c7887, 0xe1a36dcb, + 0x7f20b754, 0x9ca2a92f, 0x022173b0, 0x5aa794d9, 0xc4244e46, + 0x27a6503d, 0xb9258aa2, 0x52d052c6, 0xcc538859, 0x2fd19622, + 0xb1524cbd, 0xe9d4abd4, 0x7757714b, 0x94d56f30, 0x0a56b5af, + 0x24d9a0e3, 0xba5a7a7c, 0x59d86407, 0xc75bbe98, 0x9fdd59f1, + 0x015e836e, 0xe2dc9d15, 0x7c5f478a, 0xbec2b68d, 0x20416c12, + 0xc3c37269, 0x5d40a8f6, 0x05c64f9f, 0x9b459500, 0x78c78b7b, + 0xe64451e4, 0xc8cb44a8, 0x56489e37, 0xb5ca804c, 0x2b495ad3, + 0x73cfbdba, 0xed4c6725, 0x0ece795e, 0x904da3c1, 0x8af59a51, + 0x147640ce, 0xf7f45eb5, 0x6977842a, 0x31f16343, 0xaf72b9dc, + 0x4cf0a7a7, 0xd2737d38, 0xfcfc6874, 0x627fb2eb, 0x81fdac90, + 0x1f7e760f, 0x47f89166, 0xd97b4bf9, 0x3af95582, 0xa47a8f1d, + 0x66e77e1a, 0xf864a485, 0x1be6bafe, 0x85656061, 0xdde38708, + 0x43605d97, 0xa0e243ec, 0x3e619973, 0x10ee8c3f, 0x8e6d56a0, + 0x6def48db, 0xf36c9244, 0xabea752d, 0x3569afb2, 0xd6ebb1c9, + 0x48686b56}, + {0x00000000, 0xc0642817, 0x80c9502e, 0x40ad7839, 0x0093a15c, + 0xc0f7894b, 0x805af172, 0x403ed965, 0x002643b9, 0xc0426bae, + 0x80ef1397, 0x408b3b80, 0x00b5e2e5, 0xc0d1caf2, 0x807cb2cb, + 0x40189adc, 0x414af7a9, 0x812edfbe, 0xc183a787, 0x01e78f90, + 0x41d956f5, 0x81bd7ee2, 0xc11006db, 0x01742ecc, 0x416cb410, + 0x81089c07, 0xc1a5e43e, 0x01c1cc29, 0x41ff154c, 0x819b3d5b, + 0xc1364562, 0x01526d75, 0xc3929f88, 0x03f6b79f, 0x435bcfa6, + 0x833fe7b1, 0xc3013ed4, 0x036516c3, 0x43c86efa, 0x83ac46ed, + 0xc3b4dc31, 0x03d0f426, 0x437d8c1f, 0x8319a408, 0xc3277d6d, + 0x0343557a, 0x43ee2d43, 0x838a0554, 0x82d86821, 0x42bc4036, + 0x0211380f, 0xc2751018, 0x824bc97d, 0x422fe16a, 0x02829953, + 0xc2e6b144, 0x82fe2b98, 0x429a038f, 0x02377bb6, 0xc25353a1, + 0x826d8ac4, 0x4209a2d3, 0x02a4daea, 0xc2c0f2fd, 0xc7234eca, + 0x074766dd, 0x47ea1ee4, 0x878e36f3, 0xc7b0ef96, 0x07d4c781, + 0x4779bfb8, 0x871d97af, 0xc7050d73, 0x07612564, 0x47cc5d5d, + 0x87a8754a, 0xc796ac2f, 0x07f28438, 0x475ffc01, 0x873bd416, + 0x8669b963, 0x460d9174, 0x06a0e94d, 0xc6c4c15a, 0x86fa183f, + 0x469e3028, 0x06334811, 0xc6576006, 0x864ffada, 0x462bd2cd, + 0x0686aaf4, 0xc6e282e3, 0x86dc5b86, 0x46b87391, 0x06150ba8, + 0xc67123bf, 0x04b1d142, 0xc4d5f955, 0x8478816c, 0x441ca97b, + 0x0422701e, 0xc4465809, 0x84eb2030, 0x448f0827, 0x049792fb, + 0xc4f3baec, 0x845ec2d5, 0x443aeac2, 0x040433a7, 0xc4601bb0, + 0x84cd6389, 0x44a94b9e, 0x45fb26eb, 0x859f0efc, 0xc53276c5, + 0x05565ed2, 0x456887b7, 0x850cafa0, 0xc5a1d799, 0x05c5ff8e, + 0x45dd6552, 0x85b94d45, 0xc514357c, 0x05701d6b, 0x454ec40e, + 0x852aec19, 0xc5879420, 0x05e3bc37, 0xcf41ed4f, 0x0f25c558, + 0x4f88bd61, 0x8fec9576, 0xcfd24c13, 0x0fb66404, 0x4f1b1c3d, + 0x8f7f342a, 0xcf67aef6, 0x0f0386e1, 0x4faefed8, 0x8fcad6cf, + 0xcff40faa, 0x0f9027bd, 0x4f3d5f84, 0x8f597793, 0x8e0b1ae6, + 0x4e6f32f1, 0x0ec24ac8, 0xcea662df, 0x8e98bbba, 0x4efc93ad, + 0x0e51eb94, 0xce35c383, 0x8e2d595f, 0x4e497148, 0x0ee40971, + 0xce802166, 0x8ebef803, 0x4edad014, 0x0e77a82d, 0xce13803a, + 0x0cd372c7, 0xccb75ad0, 0x8c1a22e9, 0x4c7e0afe, 0x0c40d39b, + 0xcc24fb8c, 0x8c8983b5, 0x4cedaba2, 0x0cf5317e, 0xcc911969, + 0x8c3c6150, 0x4c584947, 0x0c669022, 0xcc02b835, 0x8cafc00c, + 0x4ccbe81b, 0x4d99856e, 0x8dfdad79, 0xcd50d540, 0x0d34fd57, + 0x4d0a2432, 0x8d6e0c25, 0xcdc3741c, 0x0da75c0b, 0x4dbfc6d7, + 0x8ddbeec0, 0xcd7696f9, 0x0d12beee, 0x4d2c678b, 0x8d484f9c, + 0xcde537a5, 0x0d811fb2, 0x0862a385, 0xc8068b92, 0x88abf3ab, + 0x48cfdbbc, 0x08f102d9, 0xc8952ace, 0x883852f7, 0x485c7ae0, + 0x0844e03c, 0xc820c82b, 0x888db012, 0x48e99805, 0x08d74160, + 0xc8b36977, 0x881e114e, 0x487a3959, 0x4928542c, 0x894c7c3b, + 0xc9e10402, 0x09852c15, 0x49bbf570, 0x89dfdd67, 0xc972a55e, + 0x09168d49, 0x490e1795, 0x896a3f82, 0xc9c747bb, 0x09a36fac, + 0x499db6c9, 0x89f99ede, 0xc954e6e7, 0x0930cef0, 0xcbf03c0d, + 0x0b94141a, 0x4b396c23, 0x8b5d4434, 0xcb639d51, 0x0b07b546, + 0x4baacd7f, 0x8bcee568, 0xcbd67fb4, 0x0bb257a3, 0x4b1f2f9a, + 0x8b7b078d, 0xcb45dee8, 0x0b21f6ff, 0x4b8c8ec6, 0x8be8a6d1, + 0x8abacba4, 0x4adee3b3, 0x0a739b8a, 0xca17b39d, 0x8a296af8, + 0x4a4d42ef, 0x0ae03ad6, 0xca8412c1, 0x8a9c881d, 0x4af8a00a, + 0x0a55d833, 0xca31f024, 0x8a0f2941, 0x4a6b0156, 0x0ac6796f, + 0xcaa25178}, + {0x00000000, 0xd4ea739b, 0xe9d396ed, 0x3d39e576, 0x93a15c00, + 0x474b2f9b, 0x7a72caed, 0xae98b976, 0x2643b900, 0xf2a9ca9b, + 0xcf902fed, 0x1b7a5c76, 0xb5e2e500, 0x6108969b, 0x5c3173ed, + 0x88db0076, 0x4c867201, 0x986c019a, 0xa555e4ec, 0x71bf9777, + 0xdf272e01, 0x0bcd5d9a, 0x36f4b8ec, 0xe21ecb77, 0x6ac5cb01, + 0xbe2fb89a, 0x83165dec, 0x57fc2e77, 0xf9649701, 0x2d8ee49a, + 0x10b701ec, 0xc45d7277, 0x980ce502, 0x4ce69699, 0x71df73ef, + 0xa5350074, 0x0badb902, 0xdf47ca99, 0xe27e2fef, 0x36945c74, + 0xbe4f5c02, 0x6aa52f99, 0x579ccaef, 0x8376b974, 0x2dee0002, + 0xf9047399, 0xc43d96ef, 0x10d7e574, 0xd48a9703, 0x0060e498, + 0x3d5901ee, 0xe9b37275, 0x472bcb03, 0x93c1b898, 0xaef85dee, + 0x7a122e75, 0xf2c92e03, 0x26235d98, 0x1b1ab8ee, 0xcff0cb75, + 0x61687203, 0xb5820198, 0x88bbe4ee, 0x5c519775, 0x3019ca05, + 0xe4f3b99e, 0xd9ca5ce8, 0x0d202f73, 0xa3b89605, 0x7752e59e, + 0x4a6b00e8, 0x9e817373, 0x165a7305, 0xc2b0009e, 0xff89e5e8, + 0x2b639673, 0x85fb2f05, 0x51115c9e, 0x6c28b9e8, 0xb8c2ca73, + 0x7c9fb804, 0xa875cb9f, 0x954c2ee9, 0x41a65d72, 0xef3ee404, + 0x3bd4979f, 0x06ed72e9, 0xd2070172, 0x5adc0104, 0x8e36729f, + 0xb30f97e9, 0x67e5e472, 0xc97d5d04, 0x1d972e9f, 0x20aecbe9, + 0xf444b872, 0xa8152f07, 0x7cff5c9c, 0x41c6b9ea, 0x952cca71, + 0x3bb47307, 0xef5e009c, 0xd267e5ea, 0x068d9671, 0x8e569607, + 0x5abce59c, 0x678500ea, 0xb36f7371, 0x1df7ca07, 0xc91db99c, + 0xf4245cea, 0x20ce2f71, 0xe4935d06, 0x30792e9d, 0x0d40cbeb, + 0xd9aab870, 0x77320106, 0xa3d8729d, 0x9ee197eb, 0x4a0be470, + 0xc2d0e406, 0x163a979d, 0x2b0372eb, 0xffe90170, 0x5171b806, + 0x859bcb9d, 0xb8a22eeb, 0x6c485d70, 0x6032940b, 0xb4d8e790, + 0x89e102e6, 0x5d0b717d, 0xf393c80b, 0x2779bb90, 0x1a405ee6, + 0xceaa2d7d, 0x46712d0b, 0x929b5e90, 0xafa2bbe6, 0x7b48c87d, + 0xd5d0710b, 0x013a0290, 0x3c03e7e6, 0xe8e9947d, 0x2cb4e60a, + 0xf85e9591, 0xc56770e7, 0x118d037c, 0xbf15ba0a, 0x6bffc991, + 0x56c62ce7, 0x822c5f7c, 0x0af75f0a, 0xde1d2c91, 0xe324c9e7, + 0x37ceba7c, 0x9956030a, 0x4dbc7091, 0x708595e7, 0xa46fe67c, + 0xf83e7109, 0x2cd40292, 0x11ede7e4, 0xc507947f, 0x6b9f2d09, + 0xbf755e92, 0x824cbbe4, 0x56a6c87f, 0xde7dc809, 0x0a97bb92, + 0x37ae5ee4, 0xe3442d7f, 0x4ddc9409, 0x9936e792, 0xa40f02e4, + 0x70e5717f, 0xb4b80308, 0x60527093, 0x5d6b95e5, 0x8981e67e, + 0x27195f08, 0xf3f32c93, 0xcecac9e5, 0x1a20ba7e, 0x92fbba08, + 0x4611c993, 0x7b282ce5, 0xafc25f7e, 0x015ae608, 0xd5b09593, + 0xe88970e5, 0x3c63037e, 0x502b5e0e, 0x84c12d95, 0xb9f8c8e3, + 0x6d12bb78, 0xc38a020e, 0x17607195, 0x2a5994e3, 0xfeb3e778, + 0x7668e70e, 0xa2829495, 0x9fbb71e3, 0x4b510278, 0xe5c9bb0e, + 0x3123c895, 0x0c1a2de3, 0xd8f05e78, 0x1cad2c0f, 0xc8475f94, + 0xf57ebae2, 0x2194c979, 0x8f0c700f, 0x5be60394, 0x66dfe6e2, + 0xb2359579, 0x3aee950f, 0xee04e694, 0xd33d03e2, 0x07d77079, + 0xa94fc90f, 0x7da5ba94, 0x409c5fe2, 0x94762c79, 0xc827bb0c, + 0x1ccdc897, 0x21f42de1, 0xf51e5e7a, 0x5b86e70c, 0x8f6c9497, + 0xb25571e1, 0x66bf027a, 0xee64020c, 0x3a8e7197, 0x07b794e1, + 0xd35de77a, 0x7dc55e0c, 0xa92f2d97, 0x9416c8e1, 0x40fcbb7a, + 0x84a1c90d, 0x504bba96, 0x6d725fe0, 0xb9982c7b, 0x1700950d, + 0xc3eae696, 0xfed303e0, 0x2a39707b, 0xa2e2700d, 0x76080396, + 0x4b31e6e0, 0x9fdb957b, 0x31432c0d, 0xe5a95f96, 0xd890bae0, + 0x0c7ac97b}, + {0x00000000, 0x27652581, 0x0fcc3bd9, 0x28a91e58, 0x5f9e0669, + 0x78fb23e8, 0x50523db0, 0x77371831, 0xbe3c0dd2, 0x99592853, + 0xb1f0360b, 0x9695138a, 0xe1a20bbb, 0xc6c72e3a, 0xee6e3062, + 0xc90b15e3, 0x3d7f6b7f, 0x1a1a4efe, 0x32b350a6, 0x15d67527, + 0x62e16d16, 0x45844897, 0x6d2d56cf, 0x4a48734e, 0x834366ad, + 0xa426432c, 0x8c8f5d74, 0xabea78f5, 0xdcdd60c4, 0xfbb84545, + 0xd3115b1d, 0xf4747e9c, 0x7afed6fe, 0x5d9bf37f, 0x7532ed27, + 0x5257c8a6, 0x2560d097, 0x0205f516, 0x2aaceb4e, 0x0dc9cecf, + 0xc4c2db2c, 0xe3a7fead, 0xcb0ee0f5, 0xec6bc574, 0x9b5cdd45, + 0xbc39f8c4, 0x9490e69c, 0xb3f5c31d, 0x4781bd81, 0x60e49800, + 0x484d8658, 0x6f28a3d9, 0x181fbbe8, 0x3f7a9e69, 0x17d38031, + 0x30b6a5b0, 0xf9bdb053, 0xded895d2, 0xf6718b8a, 0xd114ae0b, + 0xa623b63a, 0x814693bb, 0xa9ef8de3, 0x8e8aa862, 0xb5fadc26, + 0x929ff9a7, 0xba36e7ff, 0x9d53c27e, 0xea64da4f, 0xcd01ffce, + 0xe5a8e196, 0xc2cdc417, 0x0bc6d1f4, 0x2ca3f475, 0x040aea2d, + 0x236fcfac, 0x5458d79d, 0x733df21c, 0x5b94ec44, 0x7cf1c9c5, + 0x8885b759, 0xafe092d8, 0x87498c80, 0xa02ca901, 0xd71bb130, + 0xf07e94b1, 0xd8d78ae9, 0xffb2af68, 0x36b9ba8b, 0x11dc9f0a, + 0x39758152, 0x1e10a4d3, 0x6927bce2, 0x4e429963, 0x66eb873b, + 0x418ea2ba, 0xcf040ad8, 0xe8612f59, 0xc0c83101, 0xe7ad1480, + 0x909a0cb1, 0xb7ff2930, 0x9f563768, 0xb83312e9, 0x7138070a, + 0x565d228b, 0x7ef43cd3, 0x59911952, 0x2ea60163, 0x09c324e2, + 0x216a3aba, 0x060f1f3b, 0xf27b61a7, 0xd51e4426, 0xfdb75a7e, + 0xdad27fff, 0xade567ce, 0x8a80424f, 0xa2295c17, 0x854c7996, + 0x4c476c75, 0x6b2249f4, 0x438b57ac, 0x64ee722d, 0x13d96a1c, + 0x34bc4f9d, 0x1c1551c5, 0x3b707444, 0x6af5b94d, 0x4d909ccc, + 0x65398294, 0x425ca715, 0x356bbf24, 0x120e9aa5, 0x3aa784fd, + 0x1dc2a17c, 0xd4c9b49f, 0xf3ac911e, 0xdb058f46, 0xfc60aac7, + 0x8b57b2f6, 0xac329777, 0x849b892f, 0xa3feacae, 0x578ad232, + 0x70eff7b3, 0x5846e9eb, 0x7f23cc6a, 0x0814d45b, 0x2f71f1da, + 0x07d8ef82, 0x20bdca03, 0xe9b6dfe0, 0xced3fa61, 0xe67ae439, + 0xc11fc1b8, 0xb628d989, 0x914dfc08, 0xb9e4e250, 0x9e81c7d1, + 0x100b6fb3, 0x376e4a32, 0x1fc7546a, 0x38a271eb, 0x4f9569da, + 0x68f04c5b, 0x40595203, 0x673c7782, 0xae376261, 0x895247e0, + 0xa1fb59b8, 0x869e7c39, 0xf1a96408, 0xd6cc4189, 0xfe655fd1, + 0xd9007a50, 0x2d7404cc, 0x0a11214d, 0x22b83f15, 0x05dd1a94, + 0x72ea02a5, 0x558f2724, 0x7d26397c, 0x5a431cfd, 0x9348091e, + 0xb42d2c9f, 0x9c8432c7, 0xbbe11746, 0xccd60f77, 0xebb32af6, + 0xc31a34ae, 0xe47f112f, 0xdf0f656b, 0xf86a40ea, 0xd0c35eb2, + 0xf7a67b33, 0x80916302, 0xa7f44683, 0x8f5d58db, 0xa8387d5a, + 0x613368b9, 0x46564d38, 0x6eff5360, 0x499a76e1, 0x3ead6ed0, + 0x19c84b51, 0x31615509, 0x16047088, 0xe2700e14, 0xc5152b95, + 0xedbc35cd, 0xcad9104c, 0xbdee087d, 0x9a8b2dfc, 0xb22233a4, + 0x95471625, 0x5c4c03c6, 0x7b292647, 0x5380381f, 0x74e51d9e, + 0x03d205af, 0x24b7202e, 0x0c1e3e76, 0x2b7b1bf7, 0xa5f1b395, + 0x82949614, 0xaa3d884c, 0x8d58adcd, 0xfa6fb5fc, 0xdd0a907d, + 0xf5a38e25, 0xd2c6aba4, 0x1bcdbe47, 0x3ca89bc6, 0x1401859e, + 0x3364a01f, 0x4453b82e, 0x63369daf, 0x4b9f83f7, 0x6cfaa676, + 0x988ed8ea, 0xbfebfd6b, 0x9742e333, 0xb027c6b2, 0xc710de83, + 0xe075fb02, 0xc8dce55a, 0xefb9c0db, 0x26b2d538, 0x01d7f0b9, + 0x297eeee1, 0x0e1bcb60, 0x792cd351, 0x5e49f6d0, 0x76e0e888, + 0x5185cd09}}; + +#endif + +#endif + +#endif + +local const z_crc_t FAR x2n_table[] = { + 0x40000000, 0x20000000, 0x08000000, 0x00800000, 0x00008000, + 0xedb88320, 0xb1e6b092, 0xa06a2517, 0xed627dae, 0x88d14467, + 0xd7bbfe6a, 0xec447f11, 0x8e7ea170, 0x6427800e, 0x4d47bae0, + 0x09fe548f, 0x83852d0f, 0x30362f1a, 0x7b5a9cc3, 0x31fec169, + 0x9fec022a, 0x6c8dedc4, 0x15d6874d, 0x5fde7a4e, 0xbad90e37, + 0x2e4e5eef, 0x4eaba214, 0xa8a472c0, 0x429a969e, 0x148d302a, + 0xc40ba6d0, 0xc4e22c3c}; diff --git a/3rdparty/zlib/deflate.c b/3rdparty/zlib/deflate.c index 1ec761448d..799fb93cc0 100644 --- a/3rdparty/zlib/deflate.c +++ b/3rdparty/zlib/deflate.c @@ -1,5 +1,5 @@ /* deflate.c -- compress data using the deflation algorithm - * Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -52,7 +52,7 @@ #include "deflate.h" const char deflate_copyright[] = - " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler "; + " deflate 1.2.12 Copyright 1995-2022 Jean-loup Gailly and Mark Adler "; /* If you use the zlib library in a product, an acknowledgment is welcome in the documentation of your product. If for some reason you cannot @@ -190,8 +190,11 @@ local const config configuration_table[10] = { * prev[] will be initialized on the fly. */ #define CLEAR_HASH(s) \ - s->head[s->hash_size-1] = NIL; \ - zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); + do { \ + s->head[s->hash_size-1] = NIL; \ + zmemzero((Bytef *)s->head, \ + (unsigned)(s->hash_size-1)*sizeof(*s->head)); \ + } while (0) /* =========================================================================== * Slide the hash table when sliding the window down (could be avoided with 32 @@ -252,11 +255,6 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, int wrap = 1; static const char my_version[] = ZLIB_VERSION; - ushf *overlay; - /* We overlay pending_buf and d_buf+l_buf. This works since the average - * output size for (length,distance) codes is <= 24 bits. - */ - if (version == Z_NULL || version[0] != my_version[0] || stream_size != sizeof(z_stream)) { return Z_VERSION_ERROR; @@ -326,9 +324,47 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ - overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); - s->pending_buf = (uchf *) overlay; - s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); + /* We overlay pending_buf and sym_buf. This works since the average size + * for length/distance pairs over any compressed block is assured to be 31 + * bits or less. + * + * Analysis: The longest fixed codes are a length code of 8 bits plus 5 + * extra bits, for lengths 131 to 257. The longest fixed distance codes are + * 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest + * possible fixed-codes length/distance pair is then 31 bits total. + * + * sym_buf starts one-fourth of the way into pending_buf. So there are + * three bytes in sym_buf for every four bytes in pending_buf. Each symbol + * in sym_buf is three bytes -- two for the distance and one for the + * literal/length. As each symbol is consumed, the pointer to the next + * sym_buf value to read moves forward three bytes. From that symbol, up to + * 31 bits are written to pending_buf. The closest the written pending_buf + * bits gets to the next sym_buf symbol to read is just before the last + * code is written. At that time, 31*(n-2) bits have been written, just + * after 24*(n-2) bits have been consumed from sym_buf. sym_buf starts at + * 8*n bits into pending_buf. (Note that the symbol buffer fills when n-1 + * symbols are written.) The closest the writing gets to what is unread is + * then n+14 bits. Here n is lit_bufsize, which is 16384 by default, and + * can range from 128 to 32768. + * + * Therefore, at a minimum, there are 142 bits of space between what is + * written and what is read in the overlain buffers, so the symbols cannot + * be overwritten by the compressed data. That space is actually 139 bits, + * due to the three-bit fixed-code block header. + * + * That covers the case where either Z_FIXED is specified, forcing fixed + * codes, or when the use of fixed codes is chosen, because that choice + * results in a smaller compressed block than dynamic codes. That latter + * condition then assures that the above analysis also covers all dynamic + * blocks. A dynamic-code block will only be chosen to be emitted if it has + * fewer bits than a fixed-code block would for the same set of symbols. + * Therefore its average symbol length is assured to be less than 31. So + * the compressed data for a dynamic block also cannot overwrite the + * symbols from which it is being constructed. + */ + + s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4); + s->pending_buf_size = (ulg)s->lit_bufsize * 4; if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || s->pending_buf == Z_NULL) { @@ -337,8 +373,12 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, deflateEnd (strm); return Z_MEM_ERROR; } - s->d_buf = overlay + s->lit_bufsize/sizeof(ush); - s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; + s->sym_buf = s->pending_buf + s->lit_bufsize; + s->sym_end = (s->lit_bufsize - 1) * 3; + /* We avoid equality with lit_bufsize*3 because of wraparound at 64K + * on 16 bit machines and because stored blocks are restricted to + * 64K-1 bytes. + */ s->level = level; s->strategy = strategy; @@ -488,13 +528,13 @@ int ZEXPORT deflateResetKeep (strm) #ifdef GZIP s->wrap == 2 ? GZIP_STATE : #endif - s->wrap ? INIT_STATE : BUSY_STATE; + INIT_STATE; strm->adler = #ifdef GZIP s->wrap == 2 ? crc32(0L, Z_NULL, 0) : #endif adler32(0L, Z_NULL, 0); - s->last_flush = Z_NO_FLUSH; + s->last_flush = -2; _tr_init(s); @@ -549,7 +589,8 @@ int ZEXPORT deflatePrime (strm, bits, value) if (deflateStateCheck(strm)) return Z_STREAM_ERROR; s = strm->state; - if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3)) + if (bits < 0 || bits > 16 || + s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) return Z_BUF_ERROR; do { put = Buf_size - s->bi_valid; @@ -587,12 +628,12 @@ int ZEXPORT deflateParams(strm, level, strategy) func = configuration_table[s->level].func; if ((strategy != s->strategy || func != configuration_table[level].func) && - s->high_water) { + s->last_flush != -2) { /* Flush the last buffer: */ int err = deflate(strm, Z_BLOCK); if (err == Z_STREAM_ERROR) return err; - if (strm->avail_out == 0) + if (strm->avail_in || (s->strstart - s->block_start) + s->lookahead) return Z_BUF_ERROR; } if (s->level != level) { @@ -811,6 +852,8 @@ int ZEXPORT deflate (strm, flush) } /* Write the header */ + if (s->status == INIT_STATE && s->wrap == 0) + s->status = BUSY_STATE; if (s->status == INIT_STATE) { /* zlib header */ uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; @@ -1108,7 +1151,6 @@ int ZEXPORT deflateCopy (dest, source) #else deflate_state *ds; deflate_state *ss; - ushf *overlay; if (deflateStateCheck(source) || dest == Z_NULL) { @@ -1128,8 +1170,7 @@ int ZEXPORT deflateCopy (dest, source) ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); - overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); - ds->pending_buf = (uchf *) overlay; + ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4); if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || ds->pending_buf == Z_NULL) { @@ -1143,8 +1184,7 @@ int ZEXPORT deflateCopy (dest, source) zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); - ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); - ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; + ds->sym_buf = ds->pending_buf + ds->lit_bufsize; ds->l_desc.dyn_tree = ds->dyn_ltree; ds->d_desc.dyn_tree = ds->dyn_dtree; @@ -1513,6 +1553,8 @@ local void fill_window(s) s->match_start -= wsize; s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ s->block_start -= (long) wsize; + if (s->insert > s->strstart) + s->insert = s->strstart; slide_hash(s); more += wsize; } @@ -1742,6 +1784,7 @@ local block_state deflate_stored(s, flush) s->matches = 2; /* clear hash */ zmemcpy(s->window, s->strm->next_in - s->w_size, s->w_size); s->strstart = s->w_size; + s->insert = s->strstart; } else { if (s->window_size - s->strstart <= used) { @@ -1750,12 +1793,14 @@ local block_state deflate_stored(s, flush) zmemcpy(s->window, s->window + s->w_size, s->strstart); if (s->matches < 2) s->matches++; /* add a pending slide_hash() */ + if (s->insert > s->strstart) + s->insert = s->strstart; } zmemcpy(s->window + s->strstart, s->strm->next_in - used, used); s->strstart += used; + s->insert += MIN(used, s->w_size - s->insert); } s->block_start = s->strstart; - s->insert += MIN(used, s->w_size - s->insert); } if (s->high_water < s->strstart) s->high_water = s->strstart; @@ -1770,7 +1815,7 @@ local block_state deflate_stored(s, flush) return block_done; /* Fill the window with any remaining input. */ - have = s->window_size - s->strstart - 1; + have = s->window_size - s->strstart; if (s->strm->avail_in > have && s->block_start >= (long)s->w_size) { /* Slide the window down. */ s->block_start -= s->w_size; @@ -1779,12 +1824,15 @@ local block_state deflate_stored(s, flush) if (s->matches < 2) s->matches++; /* add a pending slide_hash() */ have += s->w_size; /* more space now */ + if (s->insert > s->strstart) + s->insert = s->strstart; } if (have > s->strm->avail_in) have = s->strm->avail_in; if (have) { read_buf(s->strm, s->window + s->strstart, have); s->strstart += have; + s->insert += MIN(have, s->w_size - s->insert); } if (s->high_water < s->strstart) s->high_water = s->strstart; @@ -1912,7 +1960,7 @@ local block_state deflate_fast(s, flush) FLUSH_BLOCK(s, 1); return finish_done; } - if (s->last_lit) + if (s->sym_next) FLUSH_BLOCK(s, 0); return block_done; } @@ -2043,7 +2091,7 @@ local block_state deflate_slow(s, flush) FLUSH_BLOCK(s, 1); return finish_done; } - if (s->last_lit) + if (s->sym_next) FLUSH_BLOCK(s, 0); return block_done; } @@ -2118,7 +2166,7 @@ local block_state deflate_rle(s, flush) FLUSH_BLOCK(s, 1); return finish_done; } - if (s->last_lit) + if (s->sym_next) FLUSH_BLOCK(s, 0); return block_done; } @@ -2157,7 +2205,7 @@ local block_state deflate_huff(s, flush) FLUSH_BLOCK(s, 1); return finish_done; } - if (s->last_lit) + if (s->sym_next) FLUSH_BLOCK(s, 0); return block_done; } diff --git a/3rdparty/zlib/deflate.h b/3rdparty/zlib/deflate.h index 23ecdd312b..17c226113b 100644 --- a/3rdparty/zlib/deflate.h +++ b/3rdparty/zlib/deflate.h @@ -1,5 +1,5 @@ /* deflate.h -- internal compression state - * Copyright (C) 1995-2016 Jean-loup Gailly + * Copyright (C) 1995-2018 Jean-loup Gailly * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -217,7 +217,7 @@ typedef struct internal_state { /* Depth of each subtree used as tie breaker for trees of equal frequency */ - uchf *l_buf; /* buffer for literals or lengths */ + uchf *sym_buf; /* buffer for distances and literals/lengths */ uInt lit_bufsize; /* Size of match buffer for literals/lengths. There are 4 reasons for @@ -239,13 +239,8 @@ typedef struct internal_state { * - I can't count above 4 */ - uInt last_lit; /* running index in l_buf */ - - ushf *d_buf; - /* Buffer for distances. To simplify the code, d_buf and l_buf have - * the same number of elements. To use different lengths, an extra flag - * array would be necessary. - */ + uInt sym_next; /* running index in sym_buf */ + uInt sym_end; /* symbol table full when sym_next reaches this */ ulg opt_len; /* bit length of current block with optimal trees */ ulg static_len; /* bit length of current block with static trees */ @@ -325,20 +320,22 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, # define _tr_tally_lit(s, c, flush) \ { uch cc = (c); \ - s->d_buf[s->last_lit] = 0; \ - s->l_buf[s->last_lit++] = cc; \ + s->sym_buf[s->sym_next++] = 0; \ + s->sym_buf[s->sym_next++] = 0; \ + s->sym_buf[s->sym_next++] = cc; \ s->dyn_ltree[cc].Freq++; \ - flush = (s->last_lit == s->lit_bufsize-1); \ + flush = (s->sym_next == s->sym_end); \ } # define _tr_tally_dist(s, distance, length, flush) \ { uch len = (uch)(length); \ ush dist = (ush)(distance); \ - s->d_buf[s->last_lit] = dist; \ - s->l_buf[s->last_lit++] = len; \ + s->sym_buf[s->sym_next++] = dist; \ + s->sym_buf[s->sym_next++] = dist >> 8; \ + s->sym_buf[s->sym_next++] = len; \ dist--; \ s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ s->dyn_dtree[d_code(dist)].Freq++; \ - flush = (s->last_lit == s->lit_bufsize-1); \ + flush = (s->sym_next == s->sym_end); \ } #else # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) diff --git a/3rdparty/zlib/gzguts.h b/3rdparty/zlib/gzguts.h index 990a4d2514..57faf37165 100644 --- a/3rdparty/zlib/gzguts.h +++ b/3rdparty/zlib/gzguts.h @@ -1,5 +1,5 @@ /* gzguts.h -- zlib internal header definitions for gz* operations - * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler + * Copyright (C) 2004-2019 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -39,7 +39,7 @@ # include #endif -#if defined(_WIN32) || defined(__CYGWIN__) +#if defined(_WIN32) # define WIDECHAR #endif @@ -190,6 +190,7 @@ typedef struct { /* just for writing */ int level; /* compression level */ int strategy; /* compression strategy */ + int reset; /* true if a reset is pending after a Z_FINISH */ /* seek request */ z_off64_t skip; /* amount to skip (already rewound if backwards) */ int seek; /* true if seek request pending */ diff --git a/3rdparty/zlib/gzlib.c b/3rdparty/zlib/gzlib.c index 4105e6aff9..dddaf26873 100644 --- a/3rdparty/zlib/gzlib.c +++ b/3rdparty/zlib/gzlib.c @@ -1,11 +1,11 @@ /* gzlib.c -- zlib functions common to reading and writing gzip files - * Copyright (C) 2004-2017 Mark Adler + * Copyright (C) 2004-2019 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ #include "gzguts.h" -#if defined(_WIN32) && !defined(__BORLANDC__) && !defined(__MINGW32__) +#if defined(_WIN32) && !defined(__BORLANDC__) # define LSEEK _lseeki64 #else #if defined(_LARGEFILE64_SOURCE) && _LFS64_LARGEFILE-0 @@ -81,6 +81,8 @@ local void gz_reset(state) state->past = 0; /* have not read past end yet */ state->how = LOOK; /* look for gzip header */ } + else /* for writing ... */ + state->reset = 0; /* no deflateReset pending */ state->seek = 0; /* no seek request pending */ gz_error(state, Z_OK, NULL); /* clear error */ state->x.pos = 0; /* no uncompressed data yet */ @@ -397,7 +399,7 @@ z_off64_t ZEXPORT gzseek64(file, offset, whence) /* if within raw area while reading, just go there */ if (state->mode == GZ_READ && state->how == COPY && state->x.pos + offset >= 0) { - ret = LSEEK(state->fd, offset - state->x.have, SEEK_CUR); + ret = LSEEK(state->fd, offset - (z_off64_t)state->x.have, SEEK_CUR); if (ret == -1) return -1; state->x.have = 0; diff --git a/3rdparty/zlib/gzread.c b/3rdparty/zlib/gzread.c index e75cae64dc..884c9bfe4c 100644 --- a/3rdparty/zlib/gzread.c +++ b/3rdparty/zlib/gzread.c @@ -1,5 +1,5 @@ /* gzread.c -- zlib functions for reading gzip files - * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler + * Copyright (C) 2004-2017 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -314,9 +314,9 @@ local z_size_t gz_read(state, buf, len) got = 0; do { /* set n to the maximum amount of len that fits in an unsigned int */ - n = -1; + n = (unsigned)-1; if (n > len) - n = (int)len; + n = (unsigned)len; /* first just try copying data from the output buffer */ if (state->x.have) { @@ -397,7 +397,7 @@ int ZEXPORT gzread(file, buf, len) } /* read len or fewer bytes to buf */ - len = (int)gz_read(state, buf, len); + len = (unsigned)gz_read(state, buf, len); /* check for an error */ if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR) @@ -447,7 +447,6 @@ z_size_t ZEXPORT gzfread(buf, size, nitems, file) int ZEXPORT gzgetc(file) gzFile file; { - int ret; unsigned char buf[1]; gz_statep state; @@ -469,8 +468,7 @@ int ZEXPORT gzgetc(file) } /* nothing there -- try gz_read() */ - ret = (int)gz_read(state, buf, 1); - return ret < 1 ? -1 : buf[0]; + return gz_read(state, buf, 1) < 1 ? -1 : buf[0]; } int ZEXPORT gzgetc_(file) diff --git a/3rdparty/zlib/gzwrite.c b/3rdparty/zlib/gzwrite.c index 57a1cc70c8..a8ffc8f53d 100644 --- a/3rdparty/zlib/gzwrite.c +++ b/3rdparty/zlib/gzwrite.c @@ -1,5 +1,5 @@ /* gzwrite.c -- zlib functions for writing gzip files - * Copyright (C) 2004-2017 Mark Adler + * Copyright (C) 2004-2019 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -97,6 +97,15 @@ local int gz_comp(state, flush) return 0; } + /* check for a pending reset */ + if (state->reset) { + /* don't start a new gzip member unless there is data to write */ + if (strm->avail_in == 0) + return 0; + deflateReset(strm); + state->reset = 0; + } + /* run deflate() on provided input until it produces no more output */ ret = Z_OK; do { @@ -134,7 +143,7 @@ local int gz_comp(state, flush) /* if that completed a deflate stream, allow another to start */ if (flush == Z_FINISH) - deflateReset(strm); + state->reset = 1; /* all done, no errors */ return 0; @@ -209,7 +218,7 @@ local z_size_t gz_write(state, buf, len) state->in); copy = state->size - have; if (copy > len) - copy = (int)len; + copy = (unsigned)len; memcpy(state->in + have, buf, copy); state->strm.avail_in += copy; state->x.pos += copy; @@ -229,7 +238,7 @@ local z_size_t gz_write(state, buf, len) do { unsigned n = (unsigned)-1; if (n > len) - n = (int)len; + n = (unsigned)len; state->strm.avail_in = n; state->x.pos += n; if (gz_comp(state, Z_NO_FLUSH) == -1) @@ -349,12 +358,11 @@ int ZEXPORT gzputc(file, c) } /* -- see zlib.h -- */ -int ZEXPORT gzputs(file, str) +int ZEXPORT gzputs(file, s) gzFile file; - const char *str; + const char *s; { - int ret; - z_size_t len; + z_size_t len, put; gz_statep state; /* get internal structure */ @@ -367,9 +375,13 @@ int ZEXPORT gzputs(file, str) return -1; /* write string */ - len = strlen(str); - ret = (int)gz_write(state, str, len); - return ret == 0 && len != 0 ? -1 : ret; + len = strlen(s); + if ((int)len < 0 || (unsigned)len != len) { + gz_error(state, Z_STREAM_ERROR, "string length does not fit in int"); + return -1; + } + put = gz_write(state, s, len); + return put < len ? -1 : (int)len; } #if defined(STDC) || defined(Z_HAVE_STDARG_H) @@ -441,7 +453,7 @@ int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va) strm->avail_in = state->size; if (gz_comp(state, Z_NO_FLUSH) == -1) return state->err; - memcpy(state->in, state->in + state->size, left); + memmove(state->in, state->in + state->size, left); strm->next_in = state->in; strm->avail_in = left; } @@ -540,7 +552,7 @@ int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, strm->avail_in = state->size; if (gz_comp(state, Z_NO_FLUSH) == -1) return state->err; - memcpy(state->in, state->in + state->size, left); + memmove(state->in, state->in + state->size, left); strm->next_in = state->in; strm->avail_in = left; } diff --git a/3rdparty/zlib/infback.c b/3rdparty/zlib/infback.c index 59679ecbfc..a390c58e81 100644 --- a/3rdparty/zlib/infback.c +++ b/3rdparty/zlib/infback.c @@ -1,5 +1,5 @@ /* infback.c -- inflate using a call-back interface - * Copyright (C) 1995-2016 Mark Adler + * Copyright (C) 1995-2022 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -477,6 +477,7 @@ void FAR *out_desc; } Tracev((stderr, "inflate: codes ok\n")); state->mode = LEN; + /* fallthrough */ case LEN: /* use inflate_fast() if we have enough input and output */ diff --git a/3rdparty/zlib/inffast.c b/3rdparty/zlib/inffast.c index 0dbd1dbc09..1fec7f363f 100644 --- a/3rdparty/zlib/inffast.c +++ b/3rdparty/zlib/inffast.c @@ -70,7 +70,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ code const FAR *dcode; /* local strm->distcode */ unsigned lmask; /* mask for first level of length codes */ unsigned dmask; /* mask for first level of distance codes */ - code here; /* retrieved table entry */ + code const *here; /* retrieved table entry */ unsigned op; /* code bits, operation, extra bits, or */ /* window position, window bytes to copy */ unsigned len; /* match length, unused bytes */ @@ -107,20 +107,20 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ hold += (unsigned long)(*in++) << bits; bits += 8; } - here = lcode[hold & lmask]; + here = lcode + (hold & lmask); dolen: - op = (unsigned)(here.bits); + op = (unsigned)(here->bits); hold >>= op; bits -= op; - op = (unsigned)(here.op); + op = (unsigned)(here->op); if (op == 0) { /* literal */ - Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ? "inflate: literal '%c'\n" : - "inflate: literal 0x%02x\n", here.val)); - *out++ = (unsigned char)(here.val); + "inflate: literal 0x%02x\n", here->val)); + *out++ = (unsigned char)(here->val); } else if (op & 16) { /* length base */ - len = (unsigned)(here.val); + len = (unsigned)(here->val); op &= 15; /* number of extra bits */ if (op) { if (bits < op) { @@ -138,14 +138,14 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ hold += (unsigned long)(*in++) << bits; bits += 8; } - here = dcode[hold & dmask]; + here = dcode + (hold & dmask); dodist: - op = (unsigned)(here.bits); + op = (unsigned)(here->bits); hold >>= op; bits -= op; - op = (unsigned)(here.op); + op = (unsigned)(here->op); if (op & 16) { /* distance base */ - dist = (unsigned)(here.val); + dist = (unsigned)(here->val); op &= 15; /* number of extra bits */ if (bits < op) { hold += (unsigned long)(*in++) << bits; @@ -264,7 +264,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ } } else if ((op & 64) == 0) { /* 2nd level distance code */ - here = dcode[here.val + (hold & ((1U << op) - 1))]; + here = dcode + here->val + (hold & ((1U << op) - 1)); goto dodist; } else { @@ -274,7 +274,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ } } else if ((op & 64) == 0) { /* 2nd level length code */ - here = lcode[here.val + (hold & ((1U << op) - 1))]; + here = lcode + here->val + (hold & ((1U << op) - 1)); goto dolen; } else if (op & 32) { /* end-of-block */ diff --git a/3rdparty/zlib/inflate.c b/3rdparty/zlib/inflate.c index 19a2cf2ed8..0e7c4f26b1 100644 --- a/3rdparty/zlib/inflate.c +++ b/3rdparty/zlib/inflate.c @@ -1,5 +1,5 @@ /* inflate.c -- zlib decompression - * Copyright (C) 1995-2016 Mark Adler + * Copyright (C) 1995-2022 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -130,6 +130,7 @@ z_streamp strm; state->mode = HEAD; state->last = 0; state->havedict = 0; + state->flags = -1; state->dmax = 32768U; state->head = Z_NULL; state->hold = 0; @@ -448,10 +449,10 @@ unsigned copy; /* check function to use adler32() for zlib or crc32() for gzip */ #ifdef GUNZIP -# define UPDATE(check, buf, len) \ +# define UPDATE_CHECK(check, buf, len) \ (state->flags ? crc32(check, buf, len) : adler32(check, buf, len)) #else -# define UPDATE(check, buf, len) adler32(check, buf, len) +# define UPDATE_CHECK(check, buf, len) adler32(check, buf, len) #endif /* check macros for header crc */ @@ -671,7 +672,6 @@ int flush; state->mode = FLAGS; break; } - state->flags = 0; /* expect zlib header */ if (state->head != Z_NULL) state->head->done = -1; if (!(state->wrap & 1) || /* check if zlib header allowed */ @@ -698,6 +698,7 @@ int flush; break; } state->dmax = 1U << len; + state->flags = 0; /* indicate zlib header */ Tracev((stderr, "inflate: zlib header ok\n")); strm->adler = state->check = adler32(0L, Z_NULL, 0); state->mode = hold & 0x200 ? DICTID : TYPE; @@ -723,6 +724,7 @@ int flush; CRC2(state->check, hold); INITBITS(); state->mode = TIME; + /* fallthrough */ case TIME: NEEDBITS(32); if (state->head != Z_NULL) @@ -731,6 +733,7 @@ int flush; CRC4(state->check, hold); INITBITS(); state->mode = OS; + /* fallthrough */ case OS: NEEDBITS(16); if (state->head != Z_NULL) { @@ -741,6 +744,7 @@ int flush; CRC2(state->check, hold); INITBITS(); state->mode = EXLEN; + /* fallthrough */ case EXLEN: if (state->flags & 0x0400) { NEEDBITS(16); @@ -754,6 +758,7 @@ int flush; else if (state->head != Z_NULL) state->head->extra = Z_NULL; state->mode = EXTRA; + /* fallthrough */ case EXTRA: if (state->flags & 0x0400) { copy = state->length; @@ -776,6 +781,7 @@ int flush; } state->length = 0; state->mode = NAME; + /* fallthrough */ case NAME: if (state->flags & 0x0800) { if (have == 0) goto inf_leave; @@ -797,6 +803,7 @@ int flush; state->head->name = Z_NULL; state->length = 0; state->mode = COMMENT; + /* fallthrough */ case COMMENT: if (state->flags & 0x1000) { if (have == 0) goto inf_leave; @@ -817,6 +824,7 @@ int flush; else if (state->head != Z_NULL) state->head->comment = Z_NULL; state->mode = HCRC; + /* fallthrough */ case HCRC: if (state->flags & 0x0200) { NEEDBITS(16); @@ -840,6 +848,7 @@ int flush; strm->adler = state->check = ZSWAP32(hold); INITBITS(); state->mode = DICT; + /* fallthrough */ case DICT: if (state->havedict == 0) { RESTORE(); @@ -847,8 +856,10 @@ int flush; } strm->adler = state->check = adler32(0L, Z_NULL, 0); state->mode = TYPE; + /* fallthrough */ case TYPE: if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave; + /* fallthrough */ case TYPEDO: if (state->last) { BYTEBITS(); @@ -899,8 +910,10 @@ int flush; INITBITS(); state->mode = COPY_; if (flush == Z_TREES) goto inf_leave; + /* fallthrough */ case COPY_: state->mode = COPY; + /* fallthrough */ case COPY: copy = state->length; if (copy) { @@ -936,6 +949,7 @@ int flush; Tracev((stderr, "inflate: table sizes ok\n")); state->have = 0; state->mode = LENLENS; + /* fallthrough */ case LENLENS: while (state->have < state->ncode) { NEEDBITS(3); @@ -957,6 +971,7 @@ int flush; Tracev((stderr, "inflate: code lengths ok\n")); state->have = 0; state->mode = CODELENS; + /* fallthrough */ case CODELENS: while (state->have < state->nlen + state->ndist) { for (;;) { @@ -1040,8 +1055,10 @@ int flush; Tracev((stderr, "inflate: codes ok\n")); state->mode = LEN_; if (flush == Z_TREES) goto inf_leave; + /* fallthrough */ case LEN_: state->mode = LEN; + /* fallthrough */ case LEN: if (have >= 6 && left >= 258) { RESTORE(); @@ -1091,6 +1108,7 @@ int flush; } state->extra = (unsigned)(here.op) & 15; state->mode = LENEXT; + /* fallthrough */ case LENEXT: if (state->extra) { NEEDBITS(state->extra); @@ -1101,6 +1119,7 @@ int flush; Tracevv((stderr, "inflate: length %u\n", state->length)); state->was = state->length; state->mode = DIST; + /* fallthrough */ case DIST: for (;;) { here = state->distcode[BITS(state->distbits)]; @@ -1128,6 +1147,7 @@ int flush; state->offset = (unsigned)here.val; state->extra = (unsigned)(here.op) & 15; state->mode = DISTEXT; + /* fallthrough */ case DISTEXT: if (state->extra) { NEEDBITS(state->extra); @@ -1144,6 +1164,7 @@ int flush; #endif Tracevv((stderr, "inflate: distance %u\n", state->offset)); state->mode = MATCH; + /* fallthrough */ case MATCH: if (left == 0) goto inf_leave; copy = out - left; @@ -1203,7 +1224,7 @@ int flush; state->total += out; if ((state->wrap & 4) && out) strm->adler = state->check = - UPDATE(state->check, put - out, out); + UPDATE_CHECK(state->check, put - out, out); out = left; if ((state->wrap & 4) && ( #ifdef GUNZIP @@ -1219,10 +1240,11 @@ int flush; } #ifdef GUNZIP state->mode = LENGTH; + /* fallthrough */ case LENGTH: if (state->wrap && state->flags) { NEEDBITS(32); - if (hold != (state->total & 0xffffffffUL)) { + if ((state->wrap & 4) && hold != (state->total & 0xffffffff)) { strm->msg = (char *)"incorrect length check"; state->mode = BAD; break; @@ -1232,6 +1254,7 @@ int flush; } #endif state->mode = DONE; + /* fallthrough */ case DONE: ret = Z_STREAM_END; goto inf_leave; @@ -1241,6 +1264,7 @@ int flush; case MEM: return Z_MEM_ERROR; case SYNC: + /* fallthrough */ default: return Z_STREAM_ERROR; } @@ -1266,7 +1290,7 @@ int flush; state->total += out; if ((state->wrap & 4) && out) strm->adler = state->check = - UPDATE(state->check, strm->next_out - out, out); + UPDATE_CHECK(state->check, strm->next_out - out, out); strm->data_type = (int)state->bits + (state->last ? 64 : 0) + (state->mode == TYPE ? 128 : 0) + (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0); @@ -1402,6 +1426,7 @@ int ZEXPORT inflateSync(strm) z_streamp strm; { unsigned len; /* number of bytes to look at or looked at */ + int flags; /* temporary to save header status */ unsigned long in, out; /* temporary to save total_in and total_out */ unsigned char buf[4]; /* to restore bit buffer to byte string */ struct inflate_state FAR *state; @@ -1434,9 +1459,15 @@ z_streamp strm; /* return no joy or set up to restart inflate() on a new block */ if (state->have != 4) return Z_DATA_ERROR; + if (state->flags == -1) + state->wrap = 0; /* if no header yet, treat as raw */ + else + state->wrap &= ~4; /* no point in computing a check value now */ + flags = state->flags; in = strm->total_in; out = strm->total_out; inflateReset(strm); strm->total_in = in; strm->total_out = out; + state->flags = flags; state->mode = TYPE; return Z_OK; } @@ -1532,7 +1563,7 @@ int check; if (inflateStateCheck(strm)) return Z_STREAM_ERROR; state = (struct inflate_state FAR *)strm->state; - if (check) + if (check && state->wrap) state->wrap |= 4; else state->wrap &= ~4; diff --git a/3rdparty/zlib/inflate.h b/3rdparty/zlib/inflate.h index a46cce6b6d..f127b6b1fa 100644 --- a/3rdparty/zlib/inflate.h +++ b/3rdparty/zlib/inflate.h @@ -1,5 +1,5 @@ /* inflate.h -- internal inflate state definition - * Copyright (C) 1995-2016 Mark Adler + * Copyright (C) 1995-2019 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -86,7 +86,8 @@ struct inflate_state { int wrap; /* bit 0 true for zlib, bit 1 true for gzip, bit 2 true to validate check value */ int havedict; /* true if dictionary provided */ - int flags; /* gzip header method and flags (0 if zlib) */ + int flags; /* gzip header method and flags, 0 if zlib, or + -1 if raw or no header yet */ unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */ unsigned long check; /* protected copy of check value */ unsigned long total; /* protected copy of output count */ diff --git a/3rdparty/zlib/inftrees.c b/3rdparty/zlib/inftrees.c index 2ea08fc13e..09462a740b 100644 --- a/3rdparty/zlib/inftrees.c +++ b/3rdparty/zlib/inftrees.c @@ -1,5 +1,5 @@ /* inftrees.c -- generate Huffman trees for efficient decoding - * Copyright (C) 1995-2017 Mark Adler + * Copyright (C) 1995-2022 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -9,7 +9,7 @@ #define MAXBITS 15 const char inflate_copyright[] = - " inflate 1.2.11 Copyright 1995-2017 Mark Adler "; + " inflate 1.2.12 Copyright 1995-2022 Mark Adler "; /* If you use the zlib library in a product, an acknowledgment is welcome in the documentation of your product. If for some reason you cannot @@ -62,7 +62,7 @@ unsigned short FAR *work; 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; static const unsigned short lext[31] = { /* Length codes 257..285 extra */ 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, - 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202}; + 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 199, 202}; static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, diff --git a/3rdparty/zlib/trees.c b/3rdparty/zlib/trees.c index 50cf4b4571..f73fd99c37 100644 --- a/3rdparty/zlib/trees.c +++ b/3rdparty/zlib/trees.c @@ -1,5 +1,5 @@ /* trees.c -- output deflated data using Huffman coding - * Copyright (C) 1995-2017 Jean-loup Gailly + * Copyright (C) 1995-2021 Jean-loup Gailly * detect_data_type() function provided freely by Cosmin Truta, 2006 * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -149,7 +149,7 @@ local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, local void compress_block OF((deflate_state *s, const ct_data *ltree, const ct_data *dtree)); local int detect_data_type OF((deflate_state *s)); -local unsigned bi_reverse OF((unsigned value, int length)); +local unsigned bi_reverse OF((unsigned code, int len)); local void bi_windup OF((deflate_state *s)); local void bi_flush OF((deflate_state *s)); @@ -416,7 +416,7 @@ local void init_block(s) s->dyn_ltree[END_BLOCK].Freq = 1; s->opt_len = s->static_len = 0L; - s->last_lit = s->matches = 0; + s->sym_next = s->matches = 0; } #define SMALLEST 1 @@ -870,7 +870,8 @@ void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last) bi_windup(s); /* align on byte boundary */ put_short(s, (ush)stored_len); put_short(s, (ush)~stored_len); - zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len); + if (stored_len) + zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len); s->pending += stored_len; #ifdef ZLIB_DEBUG s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; @@ -947,7 +948,7 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last) Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, - s->last_lit)); + s->sym_next / 3)); if (static_lenb <= opt_lenb) opt_lenb = static_lenb; @@ -1016,8 +1017,9 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc) unsigned dist; /* distance of matched string */ unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ { - s->d_buf[s->last_lit] = (ush)dist; - s->l_buf[s->last_lit++] = (uch)lc; + s->sym_buf[s->sym_next++] = dist; + s->sym_buf[s->sym_next++] = dist >> 8; + s->sym_buf[s->sym_next++] = lc; if (dist == 0) { /* lc is the unmatched char */ s->dyn_ltree[lc].Freq++; @@ -1032,30 +1034,7 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc) s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; s->dyn_dtree[d_code(dist)].Freq++; } - -#ifdef TRUNCATE_BLOCK - /* Try to guess if it is profitable to stop the current block here */ - if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { - /* Compute an upper bound for the compressed length */ - ulg out_length = (ulg)s->last_lit*8L; - ulg in_length = (ulg)((long)s->strstart - s->block_start); - int dcode; - for (dcode = 0; dcode < D_CODES; dcode++) { - out_length += (ulg)s->dyn_dtree[dcode].Freq * - (5L+extra_dbits[dcode]); - } - out_length >>= 3; - Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", - s->last_lit, in_length, out_length, - 100L - out_length*100L/in_length)); - if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; - } -#endif - return (s->last_lit == s->lit_bufsize-1); - /* We avoid equality with lit_bufsize because of wraparound at 64K - * on 16 bit machines and because stored blocks are restricted to - * 64K-1 bytes. - */ + return (s->sym_next == s->sym_end); } /* =========================================================================== @@ -1068,13 +1047,14 @@ local void compress_block(s, ltree, dtree) { unsigned dist; /* distance of matched string */ int lc; /* match length or unmatched char (if dist == 0) */ - unsigned lx = 0; /* running index in l_buf */ + unsigned sx = 0; /* running index in sym_buf */ unsigned code; /* the code to send */ int extra; /* number of extra bits to send */ - if (s->last_lit != 0) do { - dist = s->d_buf[lx]; - lc = s->l_buf[lx++]; + if (s->sym_next != 0) do { + dist = s->sym_buf[sx++] & 0xff; + dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; + lc = s->sym_buf[sx++]; if (dist == 0) { send_code(s, lc, ltree); /* send a literal byte */ Tracecv(isgraph(lc), (stderr," '%c' ", lc)); @@ -1099,11 +1079,10 @@ local void compress_block(s, ltree, dtree) } } /* literal or match pair ? */ - /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ - Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, - "pendingBuf overflow"); + /* Check that the overlay between pending_buf and sym_buf is ok: */ + Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); - } while (lx < s->last_lit); + } while (sx < s->sym_next); send_code(s, END_BLOCK, ltree); } @@ -1112,9 +1091,9 @@ local void compress_block(s, ltree, dtree) * Check if the data type is TEXT or BINARY, using the following algorithm: * - TEXT if the two conditions below are satisfied: * a) There are no non-portable control characters belonging to the - * "black list" (0..6, 14..25, 28..31). + * "block list" (0..6, 14..25, 28..31). * b) There is at least one printable character belonging to the - * "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). + * "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). * - BINARY otherwise. * - The following partially-portable control characters form a * "gray list" that is ignored in this detection algorithm: @@ -1124,19 +1103,19 @@ local void compress_block(s, ltree, dtree) local int detect_data_type(s) deflate_state *s; { - /* black_mask is the bit mask of black-listed bytes + /* block_mask is the bit mask of block-listed bytes * set bits 0..6, 14..25, and 28..31 * 0xf3ffc07f = binary 11110011111111111100000001111111 */ - unsigned long black_mask = 0xf3ffc07fUL; + unsigned long block_mask = 0xf3ffc07fUL; int n; - /* Check for non-textual ("black-listed") bytes. */ - for (n = 0; n <= 31; n++, black_mask >>= 1) - if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0)) + /* Check for non-textual ("block-listed") bytes. */ + for (n = 0; n <= 31; n++, block_mask >>= 1) + if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0)) return Z_BINARY; - /* Check for textual ("white-listed") bytes. */ + /* Check for textual ("allow-listed") bytes. */ if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 || s->dyn_ltree[13].Freq != 0) return Z_TEXT; @@ -1144,7 +1123,7 @@ local int detect_data_type(s) if (s->dyn_ltree[n].Freq != 0) return Z_TEXT; - /* There are no "black-listed" or "white-listed" bytes: + /* There are no "block-listed" or "allow-listed" bytes: * this stream either is empty or has tolerated ("gray-listed") bytes only. */ return Z_BINARY; diff --git a/3rdparty/zlib/zlib.h b/3rdparty/zlib/zlib.h index f09cdaf1e0..4a98e38bf3 100644 --- a/3rdparty/zlib/zlib.h +++ b/3rdparty/zlib/zlib.h @@ -1,7 +1,7 @@ /* zlib.h -- interface of the 'zlib' general purpose compression library - version 1.2.11, January 15th, 2017 + version 1.2.12, March 11th, 2022 - Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler + Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages @@ -37,11 +37,11 @@ extern "C" { #endif -#define ZLIB_VERSION "1.2.11" -#define ZLIB_VERNUM 0x12b0 +#define ZLIB_VERSION "1.2.12" +#define ZLIB_VERNUM 0x12c0 #define ZLIB_VER_MAJOR 1 #define ZLIB_VER_MINOR 2 -#define ZLIB_VER_REVISION 11 +#define ZLIB_VER_REVISION 12 #define ZLIB_VER_SUBREVISION 0 /* @@ -543,8 +543,7 @@ ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, int strategy)); This is another version of deflateInit with more compression options. The - fields next_in, zalloc, zfree and opaque must be initialized before by the - caller. + fields zalloc, zfree and opaque must be initialized before by the caller. The method parameter is the compression method. It must be Z_DEFLATED in this version of the library. @@ -712,11 +711,12 @@ ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, used to switch between compression and straight copy of the input data, or to switch to a different kind of input data requiring a different strategy. If the compression approach (which is a function of the level) or the - strategy is changed, and if any input has been consumed in a previous - deflate() call, then the input available so far is compressed with the old - level and strategy using deflate(strm, Z_BLOCK). There are three approaches - for the compression levels 0, 1..3, and 4..9 respectively. The new level - and strategy will take effect at the next call of deflate(). + strategy is changed, and if there have been any deflate() calls since the + state was initialized or reset, then the input available so far is + compressed with the old level and strategy using deflate(strm, Z_BLOCK). + There are three approaches for the compression levels 0, 1..3, and 4..9 + respectively. The new level and strategy will take effect at the next call + of deflate(). If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does not have enough output space to complete, then the parameter change will not @@ -865,9 +865,11 @@ ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm, detection, or add 16 to decode only the gzip format (the zlib format will return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a CRC-32 instead of an Adler-32. Unlike the gunzip utility and gzread() (see - below), inflate() will not automatically decode concatenated gzip streams. - inflate() will return Z_STREAM_END at the end of the gzip stream. The state - would need to be reset to continue decoding a subsequent gzip stream. + below), inflate() will *not* automatically decode concatenated gzip members. + inflate() will return Z_STREAM_END at the end of the gzip member. The state + would need to be reset to continue decoding a subsequent gzip member. This + *must* be done if there is more data after a gzip member, in order for the + decompression to be compliant with the gzip standard (RFC 1952). inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_VERSION_ERROR if the zlib library version is incompatible with the @@ -1302,14 +1304,14 @@ typedef struct gzFile_s *gzFile; /* semi-opaque gzip file descriptor */ /* ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); - Opens a gzip (.gz) file for reading or writing. The mode parameter is as - in fopen ("rb" or "wb") but can also include a compression level ("wb9") or - a strategy: 'f' for filtered data as in "wb6f", 'h' for Huffman-only - compression as in "wb1h", 'R' for run-length encoding as in "wb1R", or 'F' - for fixed code compression as in "wb9F". (See the description of - deflateInit2 for more information about the strategy parameter.) 'T' will - request transparent writing or appending with no compression and not using - the gzip format. + Open the gzip (.gz) file at path for reading and decompressing, or + compressing and writing. The mode parameter is as in fopen ("rb" or "wb") + but can also include a compression level ("wb9") or a strategy: 'f' for + filtered data as in "wb6f", 'h' for Huffman-only compression as in "wb1h", + 'R' for run-length encoding as in "wb1R", or 'F' for fixed code compression + as in "wb9F". (See the description of deflateInit2 for more information + about the strategy parameter.) 'T' will request transparent writing or + appending with no compression and not using the gzip format. "a" can be used instead of "w" to request that the gzip stream that will be written be appended to the file. "+" will result in an error, since @@ -1339,9 +1341,9 @@ ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); /* - gzdopen associates a gzFile with the file descriptor fd. File descriptors - are obtained from calls like open, dup, creat, pipe or fileno (if the file - has been previously opened with fopen). The mode parameter is as in gzopen. + Associate a gzFile with the file descriptor fd. File descriptors are + obtained from calls like open, dup, creat, pipe or fileno (if the file has + been previously opened with fopen). The mode parameter is as in gzopen. The next call of gzclose on the returned gzFile will also close the file descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor @@ -1362,13 +1364,13 @@ ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size)); /* - Set the internal buffer size used by this library's functions. The - default buffer size is 8192 bytes. This function must be called after - gzopen() or gzdopen(), and before any other calls that read or write the - file. The buffer memory allocation is always deferred to the first read or - write. Three times that size in buffer space is allocated. A larger buffer - size of, for example, 64K or 128K bytes will noticeably increase the speed - of decompression (reading). + Set the internal buffer size used by this library's functions for file to + size. The default buffer size is 8192 bytes. This function must be called + after gzopen() or gzdopen(), and before any other calls that read or write + the file. The buffer memory allocation is always deferred to the first read + or write. Three times that size in buffer space is allocated. A larger + buffer size of, for example, 64K or 128K bytes will noticeably increase the + speed of decompression (reading). The new buffer size also affects the maximum length for gzprintf(). @@ -1378,9 +1380,9 @@ ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size)); ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy)); /* - Dynamically update the compression level or strategy. See the description - of deflateInit2 for the meaning of these parameters. Previously provided - data is flushed before the parameter change. + Dynamically update the compression level and strategy for file. See the + description of deflateInit2 for the meaning of these parameters. Previously + provided data is flushed before applying the parameter changes. gzsetparams returns Z_OK if success, Z_STREAM_ERROR if the file was not opened for writing, Z_ERRNO if there is an error writing the flushed data, @@ -1389,7 +1391,7 @@ ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy)); ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); /* - Reads the given number of uncompressed bytes from the compressed file. If + Read and decompress up to len uncompressed bytes from file into buf. If the input file is not in gzip format, gzread copies the given number of bytes into the buffer directly from the file. @@ -1420,11 +1422,11 @@ ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems, gzFile file)); /* - Read up to nitems items of size size from file to buf, otherwise operating - as gzread() does. This duplicates the interface of stdio's fread(), with - size_t request and return types. If the library defines size_t, then - z_size_t is identical to size_t. If not, then z_size_t is an unsigned - integer type that can contain a pointer. + Read and decompress up to nitems items of size size from file into buf, + otherwise operating as gzread() does. This duplicates the interface of + stdio's fread(), with size_t request and return types. If the library + defines size_t, then z_size_t is identical to size_t. If not, then z_size_t + is an unsigned integer type that can contain a pointer. gzfread() returns the number of full items read of size size, or zero if the end of the file was reached and a full item could not be read, or if @@ -1443,18 +1445,16 @@ ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems, file, reseting and retrying on end-of-file, when size is not 1. */ -ZEXTERN int ZEXPORT gzwrite OF((gzFile file, - voidpc buf, unsigned len)); +ZEXTERN int ZEXPORT gzwrite OF((gzFile file, voidpc buf, unsigned len)); /* - Writes the given number of uncompressed bytes into the compressed file. - gzwrite returns the number of uncompressed bytes written or 0 in case of - error. + Compress and write the len uncompressed bytes at buf to file. gzwrite + returns the number of uncompressed bytes written or 0 in case of error. */ ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size, z_size_t nitems, gzFile file)); /* - gzfwrite() writes nitems items of size size from buf to file, duplicating + Compress and write nitems items of size size from buf to file, duplicating the interface of stdio's fwrite(), with size_t request and return types. If the library defines size_t, then z_size_t is identical to size_t. If not, then z_size_t is an unsigned integer type that can contain a pointer. @@ -1467,22 +1467,22 @@ ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size, ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...)); /* - Converts, formats, and writes the arguments to the compressed file under - control of the format string, as in fprintf. gzprintf returns the number of + Convert, format, compress, and write the arguments (...) to file under + control of the string format, as in fprintf. gzprintf returns the number of uncompressed bytes actually written, or a negative zlib error code in case of error. The number of uncompressed bytes written is limited to 8191, or one less than the buffer size given to gzbuffer(). The caller should assure that this limit is not exceeded. If it is exceeded, then gzprintf() will return an error (0) with nothing written. In this case, there may also be a buffer overflow with unpredictable consequences, which is possible only if - zlib was compiled with the insecure functions sprintf() or vsprintf() + zlib was compiled with the insecure functions sprintf() or vsprintf(), because the secure snprintf() or vsnprintf() functions were not available. This can be determined using zlibCompileFlags(). */ ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s)); /* - Writes the given null-terminated string to the compressed file, excluding + Compress and write the given null-terminated string s to file, excluding the terminating null character. gzputs returns the number of characters written, or -1 in case of error. @@ -1490,11 +1490,12 @@ ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s)); ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len)); /* - Reads bytes from the compressed file until len-1 characters are read, or a - newline character is read and transferred to buf, or an end-of-file - condition is encountered. If any characters are read or if len == 1, the - string is terminated with a null character. If no characters are read due - to an end-of-file or len < 1, then the buffer is left untouched. + Read and decompress bytes from file into buf, until len-1 characters are + read, or until a newline character is read and transferred to buf, or an + end-of-file condition is encountered. If any characters are read or if len + is one, the string is terminated with a null character. If no characters + are read due to an end-of-file or len is less than one, then the buffer is + left untouched. gzgets returns buf which is a null-terminated string, or it returns NULL for end-of-file or in case of error. If there was an error, the contents at @@ -1503,13 +1504,13 @@ ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len)); ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c)); /* - Writes c, converted to an unsigned char, into the compressed file. gzputc + Compress and write c, converted to an unsigned char, into file. gzputc returns the value that was written, or -1 in case of error. */ ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); /* - Reads one byte from the compressed file. gzgetc returns this byte or -1 + Read and decompress one byte from file. gzgetc returns this byte or -1 in case of end of file or error. This is implemented as a macro for speed. As such, it does not do all of the checking the other functions do. I.e. it does not check to see if file is NULL, nor whether the structure file @@ -1518,8 +1519,8 @@ ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file)); /* - Push one character back onto the stream to be read as the first character - on the next read. At least one character of push-back is allowed. + Push c back onto the stream for file to be read as the first character on + the next read. At least one character of push-back is always allowed. gzungetc() returns the character pushed, or -1 on failure. gzungetc() will fail if c is -1, and may fail if a character has been pushed but not read yet. If gzungetc is used immediately after gzopen or gzdopen, at least the @@ -1530,9 +1531,9 @@ ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file)); ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); /* - Flushes all pending output into the compressed file. The parameter flush - is as in the deflate() function. The return value is the zlib error number - (see function gzerror below). gzflush is only permitted when writing. + Flush all pending output to file. The parameter flush is as in the + deflate() function. The return value is the zlib error number (see function + gzerror below). gzflush is only permitted when writing. If the flush parameter is Z_FINISH, the remaining data is written and the gzip stream is completed in the output. If gzwrite() is called again, a new @@ -1547,8 +1548,8 @@ ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, z_off_t offset, int whence)); - Sets the starting position for the next gzread or gzwrite on the given - compressed file. The offset represents a number of bytes in the + Set the starting position to offset relative to whence for the next gzread + or gzwrite on file. The offset represents a number of bytes in the uncompressed data stream. The whence parameter is defined as in lseek(2); the value SEEK_END is not supported. @@ -1565,18 +1566,18 @@ ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, ZEXTERN int ZEXPORT gzrewind OF((gzFile file)); /* - Rewinds the given file. This function is supported only for reading. + Rewind file. This function is supported only for reading. - gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET) + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET). */ /* ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file)); - Returns the starting position for the next gzread or gzwrite on the given - compressed file. This position represents a number of bytes in the - uncompressed data stream, and is zero when starting, even if appending or - reading a gzip stream from the middle of a file using gzdopen(). + Return the starting position for the next gzread or gzwrite on file. + This position represents a number of bytes in the uncompressed data stream, + and is zero when starting, even if appending or reading a gzip stream from + the middle of a file using gzdopen(). gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) */ @@ -1584,22 +1585,22 @@ ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file)); /* ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file)); - Returns the current offset in the file being read or written. This offset - includes the count of bytes that precede the gzip stream, for example when - appending or when using gzdopen() for reading. When reading, the offset - does not include as yet unused buffered input. This information can be used - for a progress indicator. On error, gzoffset() returns -1. + Return the current compressed (actual) read or write offset of file. This + offset includes the count of bytes that precede the gzip stream, for example + when appending or when using gzdopen() for reading. When reading, the + offset does not include as yet unused buffered input. This information can + be used for a progress indicator. On error, gzoffset() returns -1. */ ZEXTERN int ZEXPORT gzeof OF((gzFile file)); /* - Returns true (1) if the end-of-file indicator has been set while reading, - false (0) otherwise. Note that the end-of-file indicator is set only if the - read tried to go past the end of the input, but came up short. Therefore, - just like feof(), gzeof() may return false even if there is no more data to - read, in the event that the last read request was for the exact number of - bytes remaining in the input file. This will happen if the input file size - is an exact multiple of the buffer size. + Return true (1) if the end-of-file indicator for file has been set while + reading, false (0) otherwise. Note that the end-of-file indicator is set + only if the read tried to go past the end of the input, but came up short. + Therefore, just like feof(), gzeof() may return false even if there is no + more data to read, in the event that the last read request was for the exact + number of bytes remaining in the input file. This will happen if the input + file size is an exact multiple of the buffer size. If gzeof() returns true, then the read functions will return no more data, unless the end-of-file indicator is reset by gzclearerr() and the input file @@ -1608,7 +1609,7 @@ ZEXTERN int ZEXPORT gzeof OF((gzFile file)); ZEXTERN int ZEXPORT gzdirect OF((gzFile file)); /* - Returns true (1) if file is being copied directly while reading, or false + Return true (1) if file is being copied directly while reading, or false (0) if file is a gzip stream being decompressed. If the input file is empty, gzdirect() will return true, since the input @@ -1629,8 +1630,8 @@ ZEXTERN int ZEXPORT gzdirect OF((gzFile file)); ZEXTERN int ZEXPORT gzclose OF((gzFile file)); /* - Flushes all pending output if necessary, closes the compressed file and - deallocates the (de)compression state. Note that once file is closed, you + Flush all pending output for file, if necessary, close file and + deallocate the (de)compression state. Note that once file is closed, you cannot call gzerror with file, since its structures have been deallocated. gzclose must not be called more than once on the same file, just as free must not be called more than once on the same allocation. @@ -1654,10 +1655,10 @@ ZEXTERN int ZEXPORT gzclose_w OF((gzFile file)); ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum)); /* - Returns the error message for the last error which occurred on the given - compressed file. errnum is set to zlib error number. If an error occurred - in the file system and not in the compression library, errnum is set to - Z_ERRNO and the application may consult errno to get the exact error code. + Return the error message for the last error which occurred on file. + errnum is set to zlib error number. If an error occurred in the file system + and not in the compression library, errnum is set to Z_ERRNO and the + application may consult errno to get the exact error code. The application must not modify the returned string. Future calls to this function may invalidate the previously returned string. If file is @@ -1670,7 +1671,7 @@ ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum)); ZEXTERN void ZEXPORT gzclearerr OF((gzFile file)); /* - Clears the error and end-of-file flags for file. This is analogous to the + Clear the error and end-of-file flags for file. This is analogous to the clearerr() function in stdio. This is useful for continuing to read a gzip file that is being written concurrently. */ @@ -1688,8 +1689,9 @@ ZEXTERN void ZEXPORT gzclearerr OF((gzFile file)); ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); /* Update a running Adler-32 checksum with the bytes buf[0..len-1] and - return the updated checksum. If buf is Z_NULL, this function returns the - required initial value for the checksum. + return the updated checksum. An Adler-32 value is in the range of a 32-bit + unsigned integer. If buf is Z_NULL, this function returns the required + initial value for the checksum. An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed much faster. @@ -1722,12 +1724,13 @@ ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2, negative, the result has no meaning or utility. */ -ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); +ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); /* Update a running CRC-32 with the bytes buf[0..len-1] and return the - updated CRC-32. If buf is Z_NULL, this function returns the required - initial value for the crc. Pre- and post-conditioning (one's complement) is - performed within this function so it shouldn't be done by the application. + updated CRC-32. A CRC-32 value is in the range of a 32-bit unsigned integer. + If buf is Z_NULL, this function returns the required initial value for the + crc. Pre- and post-conditioning (one's complement) is performed within this + function so it shouldn't be done by the application. Usage example: @@ -1739,7 +1742,7 @@ ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); if (crc != original_crc) error(); */ -ZEXTERN uLong ZEXPORT crc32_z OF((uLong adler, const Bytef *buf, +ZEXTERN uLong ZEXPORT crc32_z OF((uLong crc, const Bytef *buf, z_size_t len)); /* Same as crc32(), but with a size_t length. @@ -1755,6 +1758,20 @@ ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2)); len2. */ +/* +ZEXTERN uLong ZEXPORT crc32_combine_gen OF((z_off_t len2)); + + Return the operator corresponding to length len2, to be used with + crc32_combine_op(). +*/ + +ZEXTERN uLong ZEXPORT crc32_combine_op OF((uLong crc1, uLong crc2, uLong op)); +/* + Give the same result as crc32_combine(), using op in place of len2. op is + is generated from len2 by crc32_combine_gen(). This will be faster than + crc32_combine() if the generated op is used more than once. +*/ + /* various hacks, don't look :) */ @@ -1842,6 +1859,7 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */ ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t)); ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t)); + ZEXTERN uLong ZEXPORT crc32_combine_gen64 OF((z_off64_t)); #endif #if !defined(ZLIB_INTERNAL) && defined(Z_WANT64) @@ -1852,6 +1870,7 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */ # define z_gzoffset z_gzoffset64 # define z_adler32_combine z_adler32_combine64 # define z_crc32_combine z_crc32_combine64 +# define z_crc32_combine_gen z_crc32_combine_gen64 # else # define gzopen gzopen64 # define gzseek gzseek64 @@ -1859,6 +1878,7 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */ # define gzoffset gzoffset64 # define adler32_combine adler32_combine64 # define crc32_combine crc32_combine64 +# define crc32_combine_gen crc32_combine_gen64 # endif # ifndef Z_LARGE64 ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); @@ -1867,6 +1887,7 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */ ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile)); ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine_gen64 OF((z_off_t)); # endif #else ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *)); @@ -1875,12 +1896,14 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */ ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile)); ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine_gen OF((z_off_t)); #endif #else /* Z_SOLO */ ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine_gen OF((z_off_t)); #endif /* !Z_SOLO */ @@ -1893,7 +1916,7 @@ ZEXTERN int ZEXPORT inflateValidate OF((z_streamp, int)); ZEXTERN unsigned long ZEXPORT inflateCodesUsed OF ((z_streamp)); ZEXTERN int ZEXPORT inflateResetKeep OF((z_streamp)); ZEXTERN int ZEXPORT deflateResetKeep OF((z_streamp)); -#if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(Z_SOLO) +#if defined(_WIN32) && !defined(Z_SOLO) ZEXTERN gzFile ZEXPORT gzopen_w OF((const wchar_t *path, const char *mode)); #endif diff --git a/3rdparty/zlib/zutil.c b/3rdparty/zlib/zutil.c index 7271667fd7..dcab28a0d5 100644 --- a/3rdparty/zlib/zutil.c +++ b/3rdparty/zlib/zutil.c @@ -137,7 +137,7 @@ const char * ZEXPORT zError(err) } #if defined(_WIN32_WCE) && _WIN32_WCE < 0x800 - /* The Microsoft C Run-Time Library for Windows CE doesn't have + /* The older Microsoft C Run-Time Library for Windows CE doesn't have * errno. We define it as a global variable to simplify porting. * Its value is always 0 and should not be used. */ diff --git a/3rdparty/zlib/zutil.h b/3rdparty/zlib/zutil.h index 4774fc3235..d9a20ae1bf 100644 --- a/3rdparty/zlib/zutil.h +++ b/3rdparty/zlib/zutil.h @@ -1,5 +1,5 @@ /* zutil.h -- internal interface and configuration of the compression library - * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler + * Copyright (C) 1995-2022 Jean-loup Gailly, Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -29,10 +29,6 @@ # include #endif -#ifdef Z_SOLO - typedef long ptrdiff_t; /* guess -- will be caught if guess is wrong */ -#endif - #ifndef local # define local static #endif @@ -46,6 +42,17 @@ typedef unsigned short ush; typedef ush FAR ushf; typedef unsigned long ulg; +#if !defined(Z_U8) && !defined(Z_SOLO) && defined(STDC) +# include +# if (ULONG_MAX == 0xffffffffffffffff) +# define Z_U8 unsigned long +# elif (ULLONG_MAX == 0xffffffffffffffff) +# define Z_U8 unsigned long long +# elif (UINT_MAX == 0xffffffffffffffff) +# define Z_U8 unsigned +# endif +#endif + extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ /* (size given to avoid silly warnings with Visual C++) */ @@ -169,13 +176,7 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ #if (defined(_MSC_VER) && (_MSC_VER > 600)) && !defined __INTERIX # if defined(_WIN32_WCE) -# if _WIN32_WCE < 0x800 -# define fdopen(fd,mode) NULL /* No fdopen() */ -# ifndef _PTRDIFF_T_DEFINED - typedef int ptrdiff_t; -# define _PTRDIFF_T_DEFINED -# endif -# endif +# define fdopen(fd,mode) NULL /* No fdopen() */ # else # define fdopen(fd,type) _fdopen(fd,type) # endif diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ec7b65030..0c65cf08d1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -454,6 +454,9 @@ OCV_OPTION(WITH_TENGINE "Include Arm Inference Tengine support" OFF OCV_OPTION(WITH_ONNX "Include Microsoft ONNX Runtime support" OFF VISIBLE_IF TRUE VERIFY HAVE_ONNX) +OCV_OPTION(WITH_TIMVX "Include Tim-VX support" OFF + VISIBLE_IF TRUE + VERIFY HAVE_TIMVX) # OpenCV build components # =================================================== @@ -740,6 +743,9 @@ include(cmake/OpenCVFindProtobuf.cmake) if(WITH_TENGINE) include(cmake/OpenCVFindTengine.cmake) endif() +if(WITH_TIMVX) + include(cmake/OpenCVFindTIMVX.cmake) +endif() # ---------------------------------------------------------------------------- # Detect other 3rd-party libraries/tools @@ -1656,6 +1662,16 @@ if(WITH_WEBNN OR HAVE_WEBNN) endif() endif() +if(WITH_TIMVX) + status("") + status(" Tim-VX:" HAVE_TIMVX THEN "YES" ELSE "NO") + if(HAVE_TIMVX) + status(" Include path" TIMVX_INCLUDE_DIR THEN "${TIMVX_INCLUDE_DIR}" ELSE "NO") + status(" Link libraries:" TIMVX_LIBRARY THEN "${TIMVX_LIBRARY}" ELSE "NO") + status(" VIVANTE SDK path" VIVANTE_SDK_DIR THEN "${VIVANTE_SDK_DIR}" ELSE "NO") + endif() +endif() + if(WITH_OPENCL OR HAVE_OPENCL) ocv_build_features_string(opencl_features IF HAVE_OPENCL_SVM THEN "SVM" diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index bcb8a3e203..4f5c353980 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -314,6 +314,10 @@ if(MSVC) set(OPENCV_EXTRA_C_FLAGS "${OPENCV_EXTRA_C_FLAGS} /FS") set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} /FS") endif() + + if(AARCH64 AND NOT MSVC_VERSION LESS 1930) + set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /D _ARM64_DISTINCT_NEON_TYPES") + endif() endif() if(PROJECT_NAME STREQUAL "OpenCV") diff --git a/cmake/OpenCVDownload.cmake b/cmake/OpenCVDownload.cmake index a427a41227..3e46515537 100644 --- a/cmake/OpenCVDownload.cmake +++ b/cmake/OpenCVDownload.cmake @@ -37,6 +37,50 @@ file(WRITE "${OPENCV_DOWNLOAD_LOG}" "#use_cache \"${OPENCV_DOWNLOAD_PATH}\"\n") file(REMOVE "${OPENCV_DOWNLOAD_WITH_CURL}") file(REMOVE "${OPENCV_DOWNLOAD_WITH_WGET}") +ocv_check_environment_variables(OPENCV_DOWNLOAD_MIRROR_ID) + +function(ocv_init_download_mirror) + if(NOT DEFINED OPENCV_DOWNLOAD_MIRROR_ID) + # Run `git remote get-url origin` to get remote source + execute_process( + COMMAND + git remote get-url origin + WORKING_DIRECTORY + ${CMAKE_SOURCE_DIR} + RESULT_VARIABLE + RESULT_STATUS + OUTPUT_VARIABLE + OCV_GIT_ORIGIN_URL_OUTPUT + ERROR_QUIET + ) + # if non-git, OCV_GIT_ORIGIN_URL_OUTPUT is empty + if(NOT OCV_GIT_ORIGIN_URL_OUTPUT) + message(STATUS "ocv_init_download: OpenCV source tree is not fetched as git repository. 3rdparty resources will be downloaded from github.com by default.") + return() + else() + # Check if git origin is github.com + string(FIND "${OCV_GIT_ORIGIN_URL_OUTPUT}" "github.com" _found_github) + if(NOT ${_found_github} EQUAL -1) + set(OPENCV_DOWNLOAD_MIRROR_ID "github" CACHE STRING "") + endif() + # Check if git origin is gitcode.net + string(FIND "${OCV_GIT_ORIGIN_URL_OUTPUT}" "gitcode.net" _found_gitcode) + if(NOT ${_found_gitcode} EQUAL -1) + set(OPENCV_DOWNLOAD_MIRROR_ID "gitcode" CACHE STRING "") + endif() + endif() + endif() + + if(OPENCV_DOWNLOAD_MIRROR_ID STREQUAL "gitcode" OR OPENCV_DOWNLOAD_MIRROR_ID STREQUAL "custom") + message(STATUS "ocv_init_download: Using ${OPENCV_DOWNLOAD_MIRROR_ID}-hosted mirror to download 3rdparty components.") + ocv_cmake_hook_append(OPENCV_DOWNLOAD_PRE "${CMAKE_CURRENT_SOURCE_DIR}/cmake/mirrors/${OPENCV_DOWNLOAD_MIRROR_ID}.cmake") + elseif(OPENCV_DOWNLOAD_MIRROR_ID STREQUAL "github") + return() + else() + message(STATUS "ocv_init_download: Unable to recognize git server of OpenCV source code. Using github.com to download 3rdparty components.") + endif() +endfunction() + function(ocv_download) cmake_parse_arguments(DL "UNPACK;RELATIVE_URL" "FILENAME;HASH;DESTINATION_DIR;ID;STATUS" "URL" ${ARGN}) @@ -67,6 +111,8 @@ function(ocv_download) set(${DL_STATUS} TRUE PARENT_SCOPE) endif() + ocv_cmake_hook(OPENCV_DOWNLOAD_PRE) + # Check CMake cache for already processed tasks string(FIND "${DL_DESTINATION_DIR}" "${CMAKE_BINARY_DIR}" DL_BINARY_PATH_POS) if(DL_BINARY_PATH_POS EQUAL 0) @@ -115,7 +161,7 @@ function(ocv_download) if(DL_ID) set(__msg_prefix "${DL_ID}: ") endif() - message(STATUS "${__msg_prefix}Download: ${DL_FILENAME}") + message(STATUS "${__msg_prefix}Downloading ${DL_FILENAME} from ${DL_URL}") # Copy mode: check if copy destination exists and is correct if(NOT DL_UNPACK) @@ -252,3 +298,8 @@ ${OPENCV_DOWNLOAD_LOG} set(${OCV_DOWNLOAD_HASH_NAME} "${DL_HASH}" CACHE INTERNAL "") endif() endfunction() + +# ---------------------------------------------------------------------------- +# Initialize download in case mirror is used +# ---------------------------------------------------------------------------- +ocv_init_download_mirror() diff --git a/cmake/OpenCVFindTIMVX.cmake b/cmake/OpenCVFindTIMVX.cmake new file mode 100644 index 0000000000..339f726bd9 --- /dev/null +++ b/cmake/OpenCVFindTIMVX.cmake @@ -0,0 +1,69 @@ +set(TIMVX_INSTALL_DIR "" CACHE PATH "Path to libtim-vx installation") +set(VIVANTE_SDK_DIR "" CACHE PATH "Path to VIVANTE SDK needed by TIM-VX.") +set(VIVANTE_SDK_LIB_CANDIDATES "OpenVX;VSC;GAL;ArchModelSw;NNArchPerf" CACHE STRING "VIVANTE SDK library candidates") + +# Ensure VIVANTE SDK library candidates are present in given search path +function(find_vivante_sdk_libs _viv_notfound _viv_search_path) + foreach(one ${VIVANTE_SDK_LIB_CANDIDATES}) + #NO_DEFAULT_PATH is used to ensure VIVANTE SDK libs are from one only source + find_library(VIV_${one}_LIB ${one} PATHS "${_viv_search_path}/lib" NO_DEFAULT_PATH) + if(NOT VIV_${one}_LIB) + list(APPEND _viv_notfound_list ${one}) + endif() + endforeach() + set(${_viv_notfound} ${_viv_notfound_list} PARENT_SCOPE) +endfunction() +# Default value for VIVANTE_SDK_DIR: /usr +if(NOT VIVANTE_SDK_DIR) + set(VIVANTE_SDK_DIR "/usr") +endif() +# Environment variable VIVANTE_SDK_DIR overrides the one in this script +if(DEFINED ENV{VIVANTE_SDK_DIR}) + set(VIVANTE_SDK_DIR $ENV{VIVANTE_SDK_DIR}) + message(STATUS "TIM-VX: Load VIVANTE_SDK_DIR from system environment: ${VIVANTE_SDK_DIR}") +endif() + + +# Compile with pre-installed TIM-VX; Or compile together with TIM-VX from source +if(TIMVX_INSTALL_DIR AND NOT BUILD_TIMVX) + message(STATUS "TIM-VX: Use binaries at ${TIMVX_INSTALL_DIR}") + set(BUILD_TIMVX OFF) + + set(TIMVX_INC_DIR "${TIMVX_INSTALL_DIR}/include" CACHE INTERNAL "TIM-VX include directory") + find_library(TIMVX_LIB "tim-vx" PATHS "${TIMVX_INSTALL_DIR}/lib") + if(TIMVX_LIB) + set(TIMVX_FOUND ON) + else() + set(TIMVX_FOUND OFF) + endif() + + # Verify if requested VIVANTE SDK libraries are all found + find_vivante_sdk_libs(missing ${VIVANTE_SDK_DIR}) + if(missing) + message(STATUS "TIM-VX: Failed to find ${missing} in ${VIVANTE_SDK_DIR}/lib. Turning off TIMVX_VIV_FOUND") + set(TIMVX_VIV_FOUND OFF) + else() + message(STATUS "TIM-VX: dependent VIVANTE SDK libraries are found at ${VIVANTE_SDK_DIR}/lib.") + set(TIMVX_VIV_FOUND ON) + endif() +else() + message(STATUS "TIM-VX: Build from source") + include("${OpenCV_SOURCE_DIR}/3rdparty/libtim-vx/tim-vx.cmake") +endif() + +if(TIMVX_FOUND AND TIMVX_VIV_FOUND) + set(HAVE_TIMVX 1) + + message(STATUS "TIM-VX: Found TIM-VX includes: ${TIMVX_INC_DIR}") + message(STATUS "TIM-VX: Found TIM-VX library: ${TIMVX_LIB}") + set(TIMVX_LIBRARY ${TIMVX_LIB}) + set(TIMVX_INCLUDE_DIR ${TIMVX_INC_DIR}) + + message(STATUS "TIM-VX: Found VIVANTE SDK libraries: ${VIVANTE_SDK_DIR}/lib") + link_directories(${VIVANTE_SDK_DIR}/lib) +endif() + +MARK_AS_ADVANCED( + TIMVX_INC_DIR + TIMVX_LIB +) diff --git a/cmake/checks/cpu_neon.cpp b/cmake/checks/cpu_neon.cpp index c309e85049..bb103ec366 100644 --- a/cmake/checks/cpu_neon.cpp +++ b/cmake/checks/cpu_neon.cpp @@ -1,6 +1,7 @@ #include #if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) +# define _ARM64_DISTINCT_NEON_TYPES # include # include # define CV_NEON 1 diff --git a/cmake/mirrors/custom.cmake b/cmake/mirrors/custom.cmake new file mode 100644 index 0000000000..3cdf700e19 --- /dev/null +++ b/cmake/mirrors/custom.cmake @@ -0,0 +1,91 @@ +# Gitlab-style mirror +# CMake scripts look for opencv/opencv_3rdparty, +# OAID/Tengine, 01org/tbb(oneAPI/oneTBB), opencv/ade +# from OPENCV_DOWNLOAD_MIRROR +ocv_update(OPENCV_DOWNLOAD_MIRROR_URL "") + +###### +# Download via commit id +###### +# Tengine +ocv_update(TENGINE_PKG_MD5_CUSTOM "") +ocv_update(TENGINE_PKG_MD5_ORIGINAL 23f61ebb1dd419f1207d8876496289c5) # same as tengine_md5sum for TENGINE commit of e89cf8870de2ff0a80cfe626c0b52b2a16fb302e +# NVIDIA_OPTICAL_FLOW +ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_GITCODE "") +ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_ORIGINAL a73cd48b18dcc0cc8933b30796074191) +# TIM-VX +ocv_update(TIM-VX_PKG_MD5_GITCODE "") +ocv_update(TIM-VX_PKG_MD5_ORIGINAL 92619cc4498014ac7a09834d5e33ebd5) + +###### +# Download from release page +##### +# TBB +ocv_update(TBB_RELEASE_CUSTOM "") +ocv_update(TBB_PKG_NAME_CUSTOM "") +ocv_update(TBB_PKG_MD5_CUSTOM "") +ocv_update(TBB_PKG_MD5_ORIGINAL 5af6f6c2a24c2043e62e47205e273b1f) # same as OPENCV_TBB_RELEASE_MD5 for TBB release of v2020.2 +# ADE +ocv_update(ADE_RELEASE_CUSTOM "") +ocv_update(ADE_PKG_NAME_CUSTOM "") +ocv_update(ADE_PKG_MD5_CUSTOM "") +ocv_update(ADE_PKG_MD5_ORIGINAL b624b995ec9c439cbc2e9e6ee940d3a2) # same as ade_md5 for ADE release of v0.1.1f + +macro(ocv_download_url_custom_usercontent OWNER) + string(REPLACE "/" ";" DL_URL_split ${DL_URL}) + list(GET DL_URL_split 5 __COMMIT_ID) + list(GET DL_URL_split 6 __PKG_NAME) + set(DL_URL "https://${OPENCV_DOWNLOAD_MIRROR_URL}/${OWNER}/opencv_3rdparty/-/raw/${__COMMIT_ID}/${__PKG_NAME}/") +endmacro() +macro(ocv_download_url_custom_archive_commit_id) + if("m${${DL_ID}_PKG_MD5_CUSTOM}" STREQUAL "m") + message(WARNING "ocv_download: specify ${DL_ID}_PKG_MD5_CUSTOM to download ${DL_ID} from custom source.") + elseif(${DL_ID}_PKG_MD5_ORIGINAL STREQUAL "${DL_HASH}") + string(REPLACE "/" ";" DL_URL_split ${DL_URL}) + list(GET DL_URL_split 3 __OWNER) + list(GET DL_URL_split 4 __REPO_NAME) + set(DL_URL "https://${OPENCV_DOWNLOAD_MIRROR_URL}/${__OWNER}/${__REPO_NAME}/-/archive/") + set(DL_HASH "${${DL_ID}_PKG_MD5_CUSTOM}") + else() + message(WARNING "No information about mirrors for downloading ${DL_FILENAME} from URL='${DL_URL}' and MD5=${DL_HASH}.") + endif() +endmacro() +macro(ocv_download_url_custom_archive_release) + if("m${${DL_ID}_RELEASE_CUSTOM}" STREQUAL "m") + message(WARNING "ocv_download: specify ${DL_ID}_RELEASE_CUSTOM to download ${DL_ID} from custom source.") + return() + endif() + if("m${${DL_ID}_PKG_NAME_CUSTOM}" STREQUAL "m") + message(WARNING "ocv_download: specify ${DL_ID}_PKG_NAME_CUSTOM to download ${DL_ID} from custom source.") + return() + endif() + if("m${${DL_ID}_PKG_MD5_CUSTOM}" STREQUAL "m") + message(WARNING "ocv_download: specify ${DL_ID}_PKG_MD5_CUSTOM to download ${DL_ID} from custom source.") + return() + endif() + string(REPLACE "/" ";" DL_URL_split ${DL_URL}) + list(GET DL_URL_split 3 __OWNER) + list(GET DL_URL_split 4 __REPO_NAME) + set(DL_URL "https://${OPENCV_DOWNLOAD_MIRROR_URL}/${__OWNER}/${__REPO_NAME}/-/archive/${${DL_ID}_RELEASE_CUSTOM}/${__REPO_NAME}-") + set(DL_HASH "${${DL_ID}_PKG_MD5_CUSTOM}") +endmacro() + +if("m${OPENCV_DOWNLOAD_MIRROR_URL}" STREQUAL "m") + message(WARNING "ocv_download: specify OPENCV_DOWNLOAD_MIRROR_URL to use custom mirror.") +else() + if((DL_ID STREQUAL "FFMPEG") OR (DL_ID STREQUAL "IPPICV") OR (DL_ID STREQUAL "data") OR (DL_ID STREQUAL "xfeatures2d/boostdesc") OR (DL_ID STREQUAL "xfeatures2d/vgg")) + ocv_download_url_custom_usercontent(opencv) + elseif(DL_ID STREQUAL "wechat_qrcode") + ocv_download_url_gitcode_usercontent(WeChatCV) + elseif((DL_ID STREQUAL "TENGINE") OR (DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX")) + ocv_download_url_custom_archive_commit_id() + elseif(DL_ID STREQUAL "TBB") + ocv_download_url_custom_archive_release() + set(OPENCV_TBB_SUBDIR "${TBB_PKG_NAME_CUSTOM}" PARENT_SCOPE) + elseif(DL_ID STREQUAL "ADE") + ocv_download_url_custom_archive_release() + set(ade_subdir "${ADE_PKG_NAME_CUSTOM}" PARENT_SCOPE) + else() + message(STATUS "ocv_download: Unknown download ID ${DL_ID} for using mirror ${OPENCV_DOWNLOAD_MIRROR_URL}. Use original source instead.") + endif() +endif() diff --git a/cmake/mirrors/gitcode.cmake b/cmake/mirrors/gitcode.cmake new file mode 100644 index 0000000000..abd7a29be4 --- /dev/null +++ b/cmake/mirrors/gitcode.cmake @@ -0,0 +1,86 @@ +###### +# Download via commit id +###### +# Tengine +ocv_update(TENGINE_PKG_MD5_GITCODE 1b5908632b557275cd6e85b0c03f9690) +ocv_update(TENGINE_PKG_MD5_ORIGINAL 23f61ebb1dd419f1207d8876496289c5) # same as tengine_md5sum for TENGINE commit of e89cf8870de2ff0a80cfe626c0b52b2a16fb302e +# NVIDIA_OPTICAL_FLOW +ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_GITCODE 8d5b7eeb24d6ca9c6bcfdff4196d5b47) +ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_ORIGINAL a73cd48b18dcc0cc8933b30796074191) +# TIM-VX +ocv_update(TIM-VX_PKG_MD5_GITCODE 3f2a548b40b170668aaa60d4f60ba40b) +ocv_update(TIM-VX_PKG_MD5_ORIGINAL 92619cc4498014ac7a09834d5e33ebd5) + +###### +# Download from release page +##### +# TBB +ocv_update(TBB_RELEASE_GITCODE "v2020.2") +ocv_update(TBB_PKG_NAME_GITCODE "tbb-${TBB_RELEASE_GITCODE}") +ocv_update(TBB_PKG_MD5_GITCODE 4eeafdf16a90cb66e39a31c8d6c6804e) +ocv_update(TBB_PKG_MD5_ORIGINAL 5af6f6c2a24c2043e62e47205e273b1f) # same as OPENCV_TBB_RELEASE_MD5 for TBB release of v2020.2 +# ADE +ocv_update(ADE_RELEASE_GITCODE "v0.1.1f") +ocv_update(ADE_PKG_NAME_GITCODE "ade-${ADE_RELEASE_GITCODE}") +ocv_update(ADE_PKG_MD5_GITCODE c12909e0ccfa93138c820ba91ff37b3c) +ocv_update(ADE_PKG_MD5_ORIGINAL b624b995ec9c439cbc2e9e6ee940d3a2) # same as ade_md5 for ADE release of v0.1.1f + +# +# Replace download links for packages in opencv/opencv_3rdparty: +# 1. Extract repo owner and repo name from DL_URL. +# 2. Put repo owner and repo name into the placeholders of new DL_URL. +# +macro(ocv_download_url_gitcode_usercontent OWNER) + string(REPLACE "/" ";" DL_URL_split ${DL_URL}) + list(GET DL_URL_split 5 __COMMIT_ID) + list(GET DL_URL_split 6 __PKG_NAME) + set(DL_URL "https://gitcode.net/${OWNER}/opencv_3rdparty/-/raw/${__COMMIT_ID}/") + if(__PKG_NAME) + set(DL_URL "${DL_URL}${__PKG_NAME}/") + endif() +endmacro() +# +# Replace download links and checksums for archives/releases in other repositories: +# 1. Check if versions matched. If not matched, download from github instead. +# 2. Extract repo owner and repo name from DL_URL. +# 3. Put repo owner and repo name into the placeholders of new DL_URL. +# 4. Replace DL_HASH with the one downloaded from gitcode.net. +# +macro(ocv_download_url_gitcode_archive_commit_id) + if(DL_HASH STREQUAL "${${DL_ID}_PKG_MD5_ORIGINAL}") + string(REPLACE "/" ";" DL_URL_split ${DL_URL}) + list(GET DL_URL_split 3 __OWNER) + list(GET DL_URL_split 4 __REPO_NAME) + set(DL_URL "https://gitcode.net/mirrors/${__OWNER}/${__REPO_NAME}/-/archive/") + set(DL_HASH "${${DL_ID}_PKG_MD5_GITCODE}") + else() + message(WARNING "Package ${DL_ID} from mirror gitcode.net is outdated and will be downloaded from github.com instead.") + endif() +endmacro() +macro(ocv_download_url_gitcode_archive_release) + if(DL_HASH STREQUAL "${${DL_ID}_PKG_MD5_ORIGINAL}") + string(REPLACE "/" ";" DL_URL_split ${DL_URL}) + list(GET DL_URL_split 3 __OWNER) + list(GET DL_URL_split 4 __REPO_NAME) + set(DL_URL "https://gitcode.net/${__OWNER}/${__REPO_NAME}/-/archive/${${DL_ID}_RELEASE_GITCODE}/${__REPO_NAME}-") + set(DL_HASH "${${DL_ID}_PKG_MD5_GITCODE}") + else() + message(WARNING "Package ${DL_ID} from mirror gitcode.net is outdated and will be downloaded from github.com instead.") + endif() +endmacro() + +if((DL_ID STREQUAL "FFMPEG") OR (DL_ID STREQUAL "IPPICV") OR (DL_ID STREQUAL "data") OR (DL_ID STREQUAL "xfeatures2d/boostdesc") OR (DL_ID STREQUAL "xfeatures2d/vgg")) + ocv_download_url_gitcode_usercontent(opencv) +elseif(DL_ID STREQUAL "wechat_qrcode") + ocv_download_url_gitcode_usercontent(mirrors/WeChatCV) +elseif((DL_ID STREQUAL "TENGINE") OR (DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX")) + ocv_download_url_gitcode_archive_commit_id() +elseif(DL_ID STREQUAL "TBB") + ocv_download_url_gitcode_archive_release() + set(OPENCV_TBB_SUBDIR "${TBB_PKG_NAME_GITCODE}" PARENT_SCOPE) +elseif(DL_ID STREQUAL "ADE") + ocv_download_url_gitcode_archive_release() + set(ade_subdir "${ADE_PKG_NAME_GITCODE}" PARENT_SCOPE) +else() + message(STATUS "ocv_download: Unknown download ID ${DL_ID} for using mirror gitcode.net. Use original source instead.") +endif() diff --git a/doc/js_tutorials/js_assets/webnn-electron/package.json b/doc/js_tutorials/js_assets/webnn-electron/package.json index e6a258ee40..9c3c817db7 100644 --- a/doc/js_tutorials/js_assets/webnn-electron/package.json +++ b/doc/js_tutorials/js_assets/webnn-electron/package.json @@ -1,7 +1,7 @@ { "name": "image_classification", "version": "0.0.1", - "description": "An Electon.js example of image_classification using webnn-native", + "description": "An Electron.js example of image_classification using webnn-native", "main": "main.js", "author": "WebNN-native Authors", "license": "Apache-2.0", diff --git a/doc/js_tutorials/js_setup/js_setup/js_setup.markdown b/doc/js_tutorials/js_setup/js_setup/js_setup.markdown index 9927477443..2a7a111d8a 100644 --- a/doc/js_tutorials/js_setup/js_setup/js_setup.markdown +++ b/doc/js_tutorials/js_setup/js_setup/js_setup.markdown @@ -97,10 +97,10 @@ Building OpenCV.js from Source @endcode @note - The loader is implemented as a js file in the path `/bin/loader.js`. The loader utilizes the [WebAssembly Feature Detection](https://github.com/GoogleChromeLabs/wasm-feature-detect) to detect the features of the broswer and load corresponding OpenCV.js automatically. To use it, you need to use the UMD version of [WebAssembly Feature Detection](https://github.com/GoogleChromeLabs/wasm-feature-detect) and introduce the `loader.js` in your Web application. + The loader is implemented as a js file in the path `/bin/loader.js`. The loader utilizes the [WebAssembly Feature Detection](https://github.com/GoogleChromeLabs/wasm-feature-detect) to detect the features of the browser and load corresponding OpenCV.js automatically. To use it, you need to use the UMD version of [WebAssembly Feature Detection](https://github.com/GoogleChromeLabs/wasm-feature-detect) and introduce the `loader.js` in your Web application. Example Code: - @code{.javascipt} + @code{.javascript} // Set paths configuration let pathsConfig = { wasm: "../../build_wasm/opencv.js", @@ -173,7 +173,7 @@ This snippet and the following require [Node.js](https://nodejs.org) to be insta ### Headless with Puppeteer -Alternatively tests can run with [GoogleChrome/puppeteer](https://github.com/GoogleChrome/puppeteer#readme) which is a version of Google Chrome that runs in the terminal (useful for Continuos integration like travis CI, etc) +Alternatively tests can run with [GoogleChrome/puppeteer](https://github.com/GoogleChrome/puppeteer#readme) which is a version of Google Chrome that runs in the terminal (useful for Continuous integration like travis CI, etc) @code{.sh} cd build_js/bin @@ -229,7 +229,7 @@ node tests.js The simd optimization is experimental as wasm simd is still in development. @note - Now only emscripten LLVM upstream backend supports wasm simd, refering to https://emscripten.org/docs/porting/simd.html. So you need to setup upstream backend environment with the following command first: + Now only emscripten LLVM upstream backend supports wasm simd, referring to https://emscripten.org/docs/porting/simd.html. So you need to setup upstream backend environment with the following command first: @code{.bash} ./emsdk update ./emsdk install latest-upstream diff --git a/doc/tutorials/calib3d/usac.markdown b/doc/tutorials/calib3d/usac.markdown index 46dba4b79e..300add042f 100644 --- a/doc/tutorials/calib3d/usac.markdown +++ b/doc/tutorials/calib3d/usac.markdown @@ -244,9 +244,9 @@ Samples: There are three new sample files in opencv/samples directory. 1. `epipolar_lines.cpp` – input arguments of `main` function are two - pathes to images. Then correspondences are found using + paths to images. Then correspondences are found using SIFT detector. Fundamental matrix is found using RANSAC from - tentaive correspondences and epipolar lines are plot. + tentative correspondences and epipolar lines are plot. 2. `essential_mat_reconstr.cpp` – input arguments are path to data file containing image names and single intrinsic matrix and directory diff --git a/doc/tutorials/core/how_to_use_OpenCV_parallel_for_new/how_to_use_OpenCV_parallel_for_new.markdown b/doc/tutorials/core/how_to_use_OpenCV_parallel_for_new/how_to_use_OpenCV_parallel_for_new.markdown index c9475c43b9..ee4db010a1 100644 --- a/doc/tutorials/core/how_to_use_OpenCV_parallel_for_new/how_to_use_OpenCV_parallel_for_new.markdown +++ b/doc/tutorials/core/how_to_use_OpenCV_parallel_for_new/how_to_use_OpenCV_parallel_for_new.markdown @@ -92,7 +92,7 @@ We then fill value to the corresponding pixel in the dst image. ### Parallel implementation -When looking at the sequential implementation, we can notice that each pixel depends on multiple neighbouring pixels but only one pixel is edited at a time. Thus, to optimize the computation, we can split the image into stripes and parallely perform convolution on each, by exploiting the multi-core architecture of modern processor. The OpenCV @ref cv::parallel_for_ framework automatically decides how to split the computation efficiently and does most of the work for us. +When looking at the sequential implementation, we can notice that each pixel depends on multiple neighbouring pixels but only one pixel is edited at a time. Thus, to optimize the computation, we can split the image into stripes and parallelly perform convolution on each, by exploiting the multi-core architecture of modern processor. The OpenCV @ref cv::parallel_for_ framework automatically decides how to split the computation efficiently and does most of the work for us. @note Although values of a pixel in a particular stripe may depend on pixel values outside the stripe, these are only read only operations and hence will not cause undefined behaviour. diff --git a/doc/tutorials/dnn/dnn_halide_scheduling/dnn_halide_scheduling.markdown b/doc/tutorials/dnn/dnn_halide_scheduling/dnn_halide_scheduling.markdown index bfbcff4cf8..4d763b918f 100644 --- a/doc/tutorials/dnn/dnn_halide_scheduling/dnn_halide_scheduling.markdown +++ b/doc/tutorials/dnn/dnn_halide_scheduling/dnn_halide_scheduling.markdown @@ -70,7 +70,7 @@ Sometimes networks built using blocked structure that means some layer are identical or quite similar. If you want to apply the same scheduling for different layers accurate to tiling or vectorization factors, define scheduling patterns in section `patterns` at the beginning of scheduling file. -Also, your patters may use some parametric variables. +Also, your patterns may use some parametric variables. @code # At the beginning of the file patterns: diff --git a/doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown b/doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown index 0e34f927f0..5db4623e7e 100644 --- a/doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown +++ b/doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown @@ -29,8 +29,8 @@ Before recognition, you should `setVocabulary` and `setDecodeType`. - "CTC-prefix-beam-search", the output of the text recognition model should be a probability matrix same with "CTC-greedy". - The algorithm is proposed at Hannun's [paper](https://arxiv.org/abs/1408.2873). - `setDecodeOptsCTCPrefixBeamSearch` could be used to control the beam size in search step. - - To futher optimize for big vocabulary, a new option `vocPruneSize` is introduced to avoid iterate the whole vocbulary - but only the number of `vocPruneSize` tokens with top probabilty. + - To further optimize for big vocabulary, a new option `vocPruneSize` is introduced to avoid iterate the whole vocbulary + but only the number of `vocPruneSize` tokens with top probability. @ref cv::dnn::TextRecognitionModel::recognize() is the main function for text recognition. - The input image should be a cropped text image or an image with `roiRects` diff --git a/doc/tutorials/gapi/anisotropic_segmentation/porting_anisotropic_segmentation.markdown b/doc/tutorials/gapi/anisotropic_segmentation/porting_anisotropic_segmentation.markdown index 60829360fe..64b68e644c 100644 --- a/doc/tutorials/gapi/anisotropic_segmentation/porting_anisotropic_segmentation.markdown +++ b/doc/tutorials/gapi/anisotropic_segmentation/porting_anisotropic_segmentation.markdown @@ -142,7 +142,7 @@ being a Graph API, doesn't force its users to do that. However, a graph is still built implicitly when a cv::GComputation object is defined. It may be useful to inspect how the resulting graph looks like to check if it is generated correctly and if it really -represents our alrogithm. It is also useful to learn the structure of +represents our algorithm. It is also useful to learn the structure of the graph to see if it has any redundancies. G-API allows to dump generated graphs to `.dot` files which then diff --git a/doc/tutorials/gapi/interactive_face_detection/interactive_face_detection.markdown b/doc/tutorials/gapi/interactive_face_detection/interactive_face_detection.markdown index 6f8b03bb61..27916b4176 100644 --- a/doc/tutorials/gapi/interactive_face_detection/interactive_face_detection.markdown +++ b/doc/tutorials/gapi/interactive_face_detection/interactive_face_detection.markdown @@ -241,7 +241,7 @@ pipeline is compiled for streaming: cv::GComputation::compileStreaming() triggers a special video-oriented form of graph compilation where G-API is trying to optimize throughput. Result of this compilation is an object of special type -cv::GStreamingCompiled -- in constract to a traditional callable +cv::GStreamingCompiled -- in contrast to a traditional callable cv::GCompiled, these objects are closer to media players in their semantics. diff --git a/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.markdown b/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.markdown index b4581f862d..4d15ed5a88 100644 --- a/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.markdown +++ b/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.markdown @@ -79,7 +79,7 @@ The main function is rather simple, as follows from the comments we do the follo In general callback functions are used to react to some kind of signal, in our case it's trackbar's state change. Explicit one-time call of `thresh_callback` is necessary to display - the "Contours" window simultaniously with the "Source" window. + the "Contours" window simultaneously with the "Source" window. @add_toggle_cpp @snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp trackbar diff --git a/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.markdown b/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.markdown index 5acdbc41ed..d37721a188 100644 --- a/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.markdown +++ b/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.markdown @@ -240,7 +240,7 @@ taken: Hello OpenCV Sample ------------------- -Here are basic steps to guide you trough the process of creating a simple OpenCV-centric +Here are basic steps to guide you through the process of creating a simple OpenCV-centric application. It will be capable of accessing camera output, processing it and displaying the result. -# Open Eclipse IDE, create a new clean workspace, create a new Android project diff --git a/doc/tutorials/introduction/linux_gdb_pretty_printer/linux_gdb_pretty_printer.markdown b/doc/tutorials/introduction/linux_gdb_pretty_printer/linux_gdb_pretty_printer.markdown index 9d64469920..b0b8d404a0 100644 --- a/doc/tutorials/introduction/linux_gdb_pretty_printer/linux_gdb_pretty_printer.markdown +++ b/doc/tutorials/introduction/linux_gdb_pretty_printer/linux_gdb_pretty_printer.markdown @@ -20,7 +20,7 @@ This pretty-printer can show element type, `is_continuous`, `is_submatrix` flags # Installation {#tutorial_linux_gdb_pretty_printer_installation} -Move into `opencv/samples/gdb/`. Place `mat_pretty_printer.py` in a convinient place, rename `gdbinit` to `.gdbinit` and move it into your home folder. Change 'source' line of `.gdbinit` to point to your `mat_pretty_printer.py` path. +Move into `opencv/samples/gdb/`. Place `mat_pretty_printer.py` in a convenient place, rename `gdbinit` to `.gdbinit` and move it into your home folder. Change 'source' line of `.gdbinit` to point to your `mat_pretty_printer.py` path. In order to check version of python bundled with your gdb, use the following commands from the gdb shell: @@ -34,5 +34,5 @@ If the version of python 3 installed in your system doesn't match the version in # Usage {#tutorial_linux_gdb_pretty_printer_usage} -The fields in a debugger prefixed with `view_` are pseudo-fields added for convinience, the rest are left as is. -If you feel that the number of elements in truncated view is too low, you can edit `mat_pretty_printer.py` - `np.set_printoptions` controlls everything matrix display-related. +The fields in a debugger prefixed with `view_` are pseudo-fields added for convenience, the rest are left as is. +If you feel that the number of elements in truncated view is too low, you can edit `mat_pretty_printer.py` - `np.set_printoptions` controls everything matrix display-related. diff --git a/doc/tutorials/ios/image_manipulation/image_manipulation.markdown b/doc/tutorials/ios/image_manipulation/image_manipulation.markdown index f01aa6e4f8..57f34e8e4c 100644 --- a/doc/tutorials/ios/image_manipulation/image_manipulation.markdown +++ b/doc/tutorials/ios/image_manipulation/image_manipulation.markdown @@ -22,7 +22,7 @@ Introduction In *OpenCV* all the image processing operations are usually carried out on the *Mat* structure. In iOS however, to render an image on screen it have to be an instance of the *UIImage* class. To convert an *OpenCV Mat* to an *UIImage* we use the *Core Graphics* framework available in iOS. Below -is the code needed to covert back and forth between Mat's and UIImage's. +is the code needed to convert back and forth between Mat's and UIImage's. @code{.m} - (cv::Mat)cvMatFromUIImage:(UIImage *)image { diff --git a/modules/3d/test/test_undistort_points.cpp b/modules/3d/test/test_undistort_points.cpp index 8765e2c5eb..1ac18dedba 100644 --- a/modules/3d/test/test_undistort_points.cpp +++ b/modules/3d/test/test_undistort_points.cpp @@ -1,34 +1,24 @@ // This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. + +#include // EXPECT_MAT_NEAR #include "test_precomp.hpp" namespace opencv_test { namespace { -class CV_UndistortTest : public cvtest::BaseTest +class UndistortPointsTest : public ::testing::Test { -public: - CV_UndistortTest(); - ~CV_UndistortTest(); protected: - void run(int); -private: void generate3DPointCloud(vector& points, Point3f pmin = Point3f(-1, -1, 5), Point3f pmax = Point3f(1, 1, 10)); void generateCameraMatrix(Mat& cameraMatrix); void generateDistCoeffs(Mat& distCoeffs, int count); - double thresh; - RNG rng; + double thresh = 1.0e-2; }; -CV_UndistortTest::CV_UndistortTest() -{ - thresh = 1.0e-2; -} -CV_UndistortTest::~CV_UndistortTest() {} - -void CV_UndistortTest::generate3DPointCloud(vector& points, Point3f pmin, Point3f pmax) +void UndistortPointsTest::generate3DPointCloud(vector& points, Point3f pmin, Point3f pmax) { RNG rng_Point = cv::theRNG(); // fix the seed to use "fixed" input 3D points for (size_t i = 0; i < points.size(); i++) @@ -39,31 +29,35 @@ void CV_UndistortTest::generate3DPointCloud(vector& points, Point3f pmi points[i] = Point3f(_x, _y, _z); } } -void CV_UndistortTest::generateCameraMatrix(Mat& cameraMatrix) + +void UndistortPointsTest::generateCameraMatrix(Mat& cameraMatrix) { const double fcMinVal = 1e-3; const double fcMaxVal = 100; cameraMatrix.create(3, 3, CV_64FC1); cameraMatrix.setTo(Scalar(0)); - cameraMatrix.at(0,0) = rng.uniform(fcMinVal, fcMaxVal); - cameraMatrix.at(1,1) = rng.uniform(fcMinVal, fcMaxVal); - cameraMatrix.at(0,2) = rng.uniform(fcMinVal, fcMaxVal); - cameraMatrix.at(1,2) = rng.uniform(fcMinVal, fcMaxVal); + cameraMatrix.at(0,0) = theRNG().uniform(fcMinVal, fcMaxVal); + cameraMatrix.at(1,1) = theRNG().uniform(fcMinVal, fcMaxVal); + cameraMatrix.at(0,2) = theRNG().uniform(fcMinVal, fcMaxVal); + cameraMatrix.at(1,2) = theRNG().uniform(fcMinVal, fcMaxVal); cameraMatrix.at(2,2) = 1; } -void CV_UndistortTest::generateDistCoeffs(Mat& distCoeffs, int count) + +void UndistortPointsTest::generateDistCoeffs(Mat& distCoeffs, int count) { distCoeffs = Mat::zeros(count, 1, CV_64FC1); for (int i = 0; i < count; i++) - distCoeffs.at(i,0) = rng.uniform(0.0, 1.0e-3); + distCoeffs.at(i,0) = theRNG().uniform(-0.1, 0.1); } -void CV_UndistortTest::run(int /* start_from */) +TEST_F(UndistortPointsTest, accuracy) { Mat intrinsics, distCoeffs; generateCameraMatrix(intrinsics); + vector points(500); generate3DPointCloud(points); + vector projectedPoints; projectedPoints.resize(points.size()); @@ -71,10 +65,15 @@ void CV_UndistortTest::run(int /* start_from */) for (int idx = 0; idx < 3; idx++) { generateDistCoeffs(distCoeffs, modelMembersCount[idx]); - projectPoints(Mat(points), Mat::zeros(3,1,CV_64FC1), Mat::zeros(3,1,CV_64FC1), intrinsics, distCoeffs, projectedPoints); + + projectPoints(Mat(points), Mat::zeros(3,1,CV_64FC1), + Mat::zeros(3,1,CV_64FC1), intrinsics, + distCoeffs, projectedPoints); vector realUndistortedPoints; - projectPoints(Mat(points), Mat::zeros(3,1,CV_64FC1), Mat::zeros(3,1,CV_64FC1), intrinsics, Mat::zeros(4,1,CV_64FC1), realUndistortedPoints); + projectPoints(Mat(points), Mat::zeros(3,1,CV_64FC1), + Mat::zeros(3,1,CV_64FC1), intrinsics, + Mat::zeros(4,1,CV_64FC1), realUndistortedPoints); Mat undistortedPoints; undistortPoints(Mat(projectedPoints), undistortedPoints, intrinsics, distCoeffs); @@ -82,44 +81,43 @@ void CV_UndistortTest::run(int /* start_from */) Mat p; perspectiveTransform(undistortedPoints, p, intrinsics); undistortedPoints = p; - double diff = cvtest::norm(Mat(realUndistortedPoints), undistortedPoints, NORM_L2); - if (diff > thresh) - { - ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY); - return; - } - ts->set_failed_test_info(cvtest::TS::OK); + + EXPECT_MAT_NEAR(realUndistortedPoints, undistortedPoints.t(), thresh); } } -TEST(Calib3d_Undistort, accuracy) { CV_UndistortTest test; test.safe_run(); } - -TEST(Calib3d_Undistort, stop_criteria) +TEST_F(UndistortPointsTest, stop_criteria) { Mat cameraMatrix = (Mat_(3,3,CV_64F) << 857.48296979, 0, 968.06224829, 0, 876.71824265, 556.37145899, 0, 0, 1); Mat distCoeffs = (Mat_(5,1,CV_64F) << -2.57614020e-01, 8.77086999e-02, -2.56970803e-04, -5.93390389e-04, -1.52194091e-02); - RNG rng(2); - Point2d pt_distorted(rng.uniform(0.0, 1920.0), rng.uniform(0.0, 1080.0)); + + Point2d pt_distorted(theRNG().uniform(0.0, 1920.0), theRNG().uniform(0.0, 1080.0)); + std::vector pt_distorted_vec; pt_distorted_vec.push_back(pt_distorted); + const double maxError = 1e-6; TermCriteria criteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 100, maxError); + std::vector pt_undist_vec; undistortPoints(pt_distorted_vec, pt_undist_vec, cameraMatrix, distCoeffs, noArray(), noArray(), criteria); - std::vector pt_redistorted_vec; std::vector pt_undist_vec_homogeneous; - pt_undist_vec_homogeneous.push_back( Point3d(pt_undist_vec[0].x, pt_undist_vec[0].y, 1.0) ); - projectPoints(pt_undist_vec_homogeneous, Mat::zeros(3,1,CV_64F), Mat::zeros(3,1,CV_64F), cameraMatrix, distCoeffs, pt_redistorted_vec); + pt_undist_vec_homogeneous.emplace_back(pt_undist_vec[0].x, pt_undist_vec[0].y, 1.0 ); + + std::vector pt_redistorted_vec; + projectPoints(pt_undist_vec_homogeneous, Mat::zeros(3,1,CV_64F), + Mat::zeros(3,1,CV_64F), cameraMatrix, distCoeffs, pt_redistorted_vec); + const double obtainedError = sqrt( pow(pt_distorted.x - pt_redistorted_vec[0].x, 2) + pow(pt_distorted.y - pt_redistorted_vec[0].y, 2) ); ASSERT_LE(obtainedError, maxError); } -TEST(undistortPoints, regression_14583) +TEST_F(UndistortPointsTest, regression_14583) { const int col = 720; // const int row = 540; diff --git a/modules/calib/include/opencv2/calib.hpp b/modules/calib/include/opencv2/calib.hpp index 00c5a19d9d..2b4d760cc1 100644 --- a/modules/calib/include/opencv2/calib.hpp +++ b/modules/calib/include/opencv2/calib.hpp @@ -1469,10 +1469,13 @@ number of points in the view. @param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3 1-channel or 1x1 3-channel @param P New camera intrinsic matrix (3x3) or new projection matrix (3x4) +@param criteria Termination criteria @param undistorted Output array of image points, 1xN/Nx1 2-channel, or vector\ . */ CV_EXPORTS_W void undistortPoints(InputArray distorted, OutputArray undistorted, - InputArray K, InputArray D, InputArray R = noArray(), InputArray P = noArray()); + InputArray K, InputArray D, InputArray R = noArray(), InputArray P = noArray(), + TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 10, 1e-8)); + /** @brief Computes undistortion and rectification maps for image transform by cv::remap(). If D is empty zero distortion is used, if R or P is empty identity matrixes are used. diff --git a/modules/calib/perf/perf_undistort.cpp b/modules/calib/perf/perf_undistort.cpp new file mode 100644 index 0000000000..ed248928eb --- /dev/null +++ b/modules/calib/perf/perf_undistort.cpp @@ -0,0 +1,44 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html +#include "perf_precomp.hpp" + +namespace opencv_test { namespace { + +using PerfIntType = perf::TestBaseWithParam>; +PERF_TEST_P(PerfIntType, fisheye_undistortPoints, + (testing::Values(1e2, 1e3, 1e4))) +{ + const cv::Size imageSize(1280, 800); + + /* Set camera matrix */ + const cv::Matx33d K(558.478087865323, 0, 620.458515360843, + 0, 560.506767351568, 381.939424848348, + 0, 0, 1); + + /* Set distortion coefficients */ + Mat D(1, 4, CV_64F); + theRNG().fill(D, RNG::UNIFORM, -1.e-5, 1.e-5); + + int pointsNumber = std::get<0>(GetParam()); + + /* Create two-channel points matrix */ + cv::Mat xy[2] = {}; + xy[0].create(pointsNumber, 1, CV_64F); + theRNG().fill(xy[0], cv::RNG::UNIFORM, 0, imageSize.width); // x + xy[1].create(pointsNumber, 1, CV_64F); + theRNG().fill(xy[1], cv::RNG::UNIFORM, 0, imageSize.height); // y + + cv::Mat points; + merge(xy, 2, points); + + /* Set fixed iteration number to check only c++ code, not algo convergence */ + TermCriteria termCriteria(TermCriteria::MAX_ITER, 10, 0); + + Mat undistortedPoints; + TEST_CYCLE() fisheye::undistortPoints(points, undistortedPoints, K, D, noArray(), noArray(), termCriteria); + + SANITY_CHECK_NOTHING(); +} + +}} // namespace diff --git a/modules/calib/src/fisheye.cpp b/modules/calib/src/fisheye.cpp index e850d74dfd..474917b413 100644 --- a/modules/calib/src/fisheye.cpp +++ b/modules/calib/src/fisheye.cpp @@ -320,7 +320,8 @@ void cv::fisheye::distortPoints(InputArray undistorted, OutputArray distorted, I ////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// cv::fisheye::undistortPoints -void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted, InputArray K, InputArray D, InputArray R, InputArray P) +void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted, InputArray K, InputArray D, + InputArray R, InputArray P, TermCriteria criteria) { CV_INSTRUMENT_REGION(); @@ -332,6 +333,8 @@ void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted CV_Assert(R.empty() || R.size() == Size(3, 3) || R.total() * R.channels() == 3); CV_Assert(D.total() == 4 && K.size() == Size(3, 3) && (K.depth() == CV_32F || K.depth() == CV_64F)); + CV_Assert(criteria.isValid()); + Vec2d f, c; if (K.depth() == CV_32F) { @@ -374,6 +377,15 @@ void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted size_t n = distorted.total(); int sdepth = distorted.depth(); + const bool isEps = criteria.type & TermCriteria::EPS; + + /* Define max count for solver iterations */ + int maxCount = std::numeric_limits::max(); + if (criteria.type & TermCriteria::MAX_ITER) { + maxCount = criteria.maxCount; + } + + for(size_t i = 0; i < n; i++ ) { Vec2d pi = sdepth == CV_32F ? (Vec2d)srcf[i] : srcd[i]; // image point @@ -391,13 +403,11 @@ void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted double scale = 0.0; - if (fabs(theta_d) > 1e-8) + if (!isEps || fabs(theta_d) > criteria.epsilon) { // compensate distortion iteratively - const double EPS = 1e-8; // or std::numeric_limits::epsilon(); - - for (int j = 0; j < 10; j++) + for (int j = 0; j < maxCount; j++) { double theta2 = theta*theta, theta4 = theta2*theta2, theta6 = theta4*theta2, theta8 = theta6*theta2; double k0_theta2 = k[0] * theta2, k1_theta4 = k[1] * theta4, k2_theta6 = k[2] * theta6, k3_theta8 = k[3] * theta8; @@ -405,7 +415,8 @@ void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted double theta_fix = (theta * (1 + k0_theta2 + k1_theta4 + k2_theta6 + k3_theta8) - theta_d) / (1 + 3*k0_theta2 + 5*k1_theta4 + 7*k2_theta6 + 9*k3_theta8); theta = theta - theta_fix; - if (fabs(theta_fix) < EPS) + + if (isEps && (fabs(theta_fix) < criteria.epsilon)) { converged = true; break; @@ -424,7 +435,7 @@ void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted // so we can check whether theta has changed the sign during the optimization bool theta_flipped = ((theta_d < 0 && theta > 0) || (theta_d > 0 && theta < 0)); - if (converged && !theta_flipped) + if ((converged || !isEps) && !theta_flipped) { Vec2d pu = pw * scale; //undistorted point diff --git a/modules/calib/test/test_fisheye.cpp b/modules/calib/test/test_fisheye.cpp index 310804d233..ad3b066a6a 100644 --- a/modules/calib/test/test_fisheye.cpp +++ b/modules/calib/test/test_fisheye.cpp @@ -101,6 +101,55 @@ TEST_F(fisheyeTest, projectPoints) EXPECT_MAT_NEAR(distorted0, distorted2, 1e-10); } +TEST_F(fisheyeTest, distortUndistortPoints) +{ + int width = imageSize.width; + int height = imageSize.height; + + /* Create test points */ + std::vector points0Vector; + cv::Mat principalPoints = (cv::Mat_(5, 2) << K(0, 2), K(1, 2), // (cx, cy) + /* Image corners */ + 0, 0, + 0, height, + width, 0, + width, height + ); + + /* Random points inside image */ + cv::Mat xy[2] = {}; + xy[0].create(100, 1, CV_64F); + theRNG().fill(xy[0], cv::RNG::UNIFORM, 0, width); // x + xy[1].create(100, 1, CV_64F); + theRNG().fill(xy[1], cv::RNG::UNIFORM, 0, height); // y + + cv::Mat randomPoints; + merge(xy, 2, randomPoints); + + cv::Mat points0; + cv::vconcat(principalPoints.reshape(2), randomPoints, points0); + + /* Test with random D set */ + for (size_t i = 0; i < 10; ++i) { + cv::Mat D(1, 4, CV_64F); + theRNG().fill(D, cv::RNG::UNIFORM, -0.00001, 0.00001); + + /* Distort -> Undistort */ + cv::Mat distortedPoints; + cv::fisheye::distortPoints(points0, distortedPoints, K, D); + cv::Mat undistortedPoints; + cv::fisheye::undistortPoints(distortedPoints, undistortedPoints, K, D); + + EXPECT_MAT_NEAR(points0, undistortedPoints, 1e-8); + + /* Undistort -> Distort */ + cv::fisheye::undistortPoints(points0, undistortedPoints, K, D); + cv::fisheye::distortPoints(undistortedPoints, distortedPoints, K, D); + + EXPECT_MAT_NEAR(points0, distortedPoints, 1e-8); + } +} + TEST_F(fisheyeTest, undistortImage) { cv::Matx33d theK = this->K; diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index 70ea4f8c1f..f7807e37ec 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -1739,6 +1739,16 @@ should be done separately if needed. */ CV_EXPORTS_W void transpose(InputArray src, OutputArray dst); +/** @brief Transpose for n-dimensional matrices. + * + * @note Input should be continuous single-channel matrix. + * @param src input array. + * @param order a permutation of [0,1,..,N-1] where N is the number of axes of src. + * The i’th axis of dst will correspond to the axis numbered order[i] of the input. + * @param dst output array of the same type as src. + */ +CV_EXPORTS_W void transposeND(InputArray src, const std::vector& order, OutputArray dst); + /** @brief Performs the matrix transformation of every array element. The function cv::transform performs the matrix transformation of every diff --git a/modules/core/include/opencv2/core/bindings_utils.hpp b/modules/core/include/opencv2/core/bindings_utils.hpp index 7a50390aed..4f7eb532b9 100644 --- a/modules/core/include/opencv2/core/bindings_utils.hpp +++ b/modules/core/include/opencv2/core/bindings_utils.hpp @@ -223,6 +223,53 @@ namespace nested { CV_WRAP static inline bool testEchoBooleanFunction(bool flag) { return flag; } + +class CV_EXPORTS_W CV_WRAP_AS(ExportClassName) OriginalClassName +{ +public: + struct CV_EXPORTS_W_SIMPLE Params + { + CV_PROP_RW int int_value; + CV_PROP_RW float float_value; + + CV_WRAP explicit Params(int int_param = 123, float float_param = 3.5f) + { + int_value = int_param; + float_value = float_param; + } + }; + + explicit OriginalClassName(const OriginalClassName::Params& params = OriginalClassName::Params()) + { + params_ = params; + } + + CV_WRAP int getIntParam() const + { + return params_.int_value; + } + + CV_WRAP float getFloatParam() const + { + return params_.float_value; + } + + CV_WRAP static std::string originalName() + { + return "OriginalClassName"; + } + + CV_WRAP static Ptr + create(const OriginalClassName::Params& params = OriginalClassName::Params()) + { + return makePtr(params); + } + +private: + OriginalClassName::Params params_; +}; + +typedef OriginalClassName::Params OriginalClassName_Params; } // namespace nested namespace fs { diff --git a/modules/core/include/opencv2/core/hal/intrin_msa.hpp b/modules/core/include/opencv2/core/hal/intrin_msa.hpp index a1fbb093a8..c035fdad60 100644 --- a/modules/core/include/opencv2/core/hal/intrin_msa.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_msa.hpp @@ -1037,12 +1037,12 @@ inline scalartype v_reduce_sum(const _Tpvec& a) \ return (scalartype)msa_sum_##suffix(a.val); \ } -OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint8x16, unsigned char, u8) -OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int8x16, char, s8) -OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint16x8, unsigned short, u16) -OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int16x8, short, s16) -OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int32x4, int, s32) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint8x16, unsigned short, u8) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int8x16, short, s8) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint16x8, unsigned, u16) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int16x8, int, s16) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint32x4, uint64_t, u32) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int32x4, int64_t, s32) OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_float32x4, float, f32) inline uint64 v_reduce_sum(const v_uint64x2& a) diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index e17972a3fc..28cf813379 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -591,28 +591,26 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) { +#if CV_NEON_AARCH64 + int32x4_t c = vmull_high_s16(a.val, b.val); +#else // #if CV_NEON_AARCH64 + int32x4_t c = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)); +#endif // #if CV_NEON_AARCH64 return v_int16x8(vcombine_s16( vshrn_n_s32(vmull_s16( vget_low_s16(a.val), vget_low_s16(b.val)), 16), - vshrn_n_s32( -#if CV_NEON_AARCH64 - vmull_high_s16(a.val, b.val) -#else // #if CV_NEON_AARCH64 - vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)) -#endif // #if CV_NEON_AARCH64 - , 16) + vshrn_n_s32(c, 16) )); } inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) { +#if CV_NEON_AARCH64 + uint32x4_t c = vmull_high_u16(a.val, b.val); +#else // #if CV_NEON_AARCH64 + uint32x4_t c = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)); +#endif // #if CV_NEON_AARCH64 return v_uint16x8(vcombine_u16( vshrn_n_u32(vmull_u16( vget_low_u16(a.val), vget_low_u16(b.val)), 16), - vshrn_n_u32( -#if CV_NEON_AARCH64 - vmull_high_u16(a.val, b.val) -#else // #if CV_NEON_AARCH64 - vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)) -#endif // #if CV_NEON_AARCH64 - , 16) + vshrn_n_u32(c, 16) )); } @@ -1937,10 +1935,14 @@ inline v_int32x4 v_round(const v_float32x4& a) { float32x4_t a_ = a.val; int32x4_t result; +#if defined _MSC_VER + result = vcvtnq_s32_f32(a_); +#else __asm__ ("fcvtns %0.4s, %1.4s" : "=w"(result) : "w"(a_) : /* No clobbers */); +#endif return v_int32x4(result); } #else diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp index fe6c077639..a592976827 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp @@ -230,6 +230,7 @@ inline vint16mf2_t vwcvt_x_x_v_i16mf2 (vint8mf4_t src, size_t vl) //////////// Types //////////// +#ifndef __clang__ struct v_uint8x16 { typedef uchar lane_type; @@ -531,7 +532,358 @@ struct v_float64x2 double val[2]; }; #endif +#else +struct v_uint8x16 +{ + typedef uchar lane_type; + enum { nlanes = 16 }; + v_uint8x16() {} + explicit v_uint8x16(vuint8m1_t v) + { + *pval = v; + } + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + { + uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + *pval = vle8_v_u8m1(v, nlanes); + } + operator vuint8m1_t() const + { + return *pval; + } + uchar get0() const + { + return vmv_x(*pval); + } + inline v_uint8x16& operator=(const v_uint8x16& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_uint8x16(const v_uint8x16& vec) { + *pval = *(vec.pval); + } + uchar val[16]; + vuint8m1_t* pval = (vuint8m1_t*)val; +}; + +struct v_int8x16 +{ + typedef schar lane_type; + enum { nlanes = 16 }; + + v_int8x16() {} + explicit v_int8x16(vint8m1_t v) + { + *pval = v; + } + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + { + schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + *pval = vle8_v_i8m1(v, nlanes); + } + operator vint8m1_t() const + { + return *pval; + } + schar get0() const + { + return vmv_x(*pval); + } + inline v_int8x16& operator=(const v_int8x16& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_int8x16(const v_int8x16& vec) { + *pval = *(vec.pval); + } + schar val[16]; + vint8m1_t* pval = (vint8m1_t*)val; +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + enum { nlanes = 8 }; + + v_uint16x8() {} + explicit v_uint16x8(vuint16m1_t v) + { + *pval = v; + } + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + { + ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + *pval = vle16_v_u16m1(v, nlanes); + } + operator vuint16m1_t() const + { + return *pval; + } + ushort get0() const + { + return vmv_x(*pval); + } + + inline v_uint16x8& operator=(const v_uint16x8& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_uint16x8(const v_uint16x8& vec) { + *pval = *(vec.pval); + } + ushort val[8]; + vuint16m1_t* pval = (vuint16m1_t*)val; +}; + +struct v_int16x8 +{ + typedef short lane_type; + enum { nlanes = 8 }; + + v_int16x8() {} + explicit v_int16x8(vint16m1_t v) + { + *pval = v; + } + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + { + short v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + *pval = vle16_v_i16m1(v, nlanes); + } + operator vint16m1_t() const + { + return *pval; + } + short get0() const + { + return vmv_x(*pval); + } + + inline v_int16x8& operator=(const v_int16x8& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_int16x8(const v_int16x8& vec) { + *pval = *(vec.pval); + } + short val[8]; + vint16m1_t* pval = (vint16m1_t*)val; +}; + +struct v_uint32x4 +{ + typedef unsigned lane_type; + enum { nlanes = 4 }; + + v_uint32x4() {} + explicit v_uint32x4(vuint32m1_t v) + { + *pval = v; + } + v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) + { + unsigned v[] = {v0, v1, v2, v3}; + *pval = vle32_v_u32m1(v, nlanes); + } + operator vuint32m1_t() const + { + return *pval; + } + unsigned get0() const + { + return vmv_x(*pval); + } + + inline v_uint32x4& operator=(const v_uint32x4& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_uint32x4(const v_uint32x4& vec) { + *pval = *(vec.pval); + } + unsigned val[4]; + vuint32m1_t* pval = (vuint32m1_t*)val; +}; + +struct v_int32x4 +{ + typedef int lane_type; + enum { nlanes = 4 }; + + v_int32x4() {} + explicit v_int32x4(vint32m1_t v) + { + *pval = v; + } + v_int32x4(int v0, int v1, int v2, int v3) + { + int v[] = {v0, v1, v2, v3}; + *pval = vle32_v_i32m1(v, nlanes); + } + operator vint32m1_t() const + { + return *pval; + } + int get0() const + { + return vmv_x(*pval); + } + + inline v_int32x4& operator=(const v_int32x4& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_int32x4(const v_int32x4& vec) { + *pval = *(vec.pval); + } + int val[4]; + vint32m1_t* pval = (vint32m1_t*)val; +}; + +struct v_float32x4 +{ + typedef float lane_type; + enum { nlanes = 4 }; + + v_float32x4() {} + explicit v_float32x4(vfloat32m1_t v) + { + *pval = v; + } + v_float32x4(float v0, float v1, float v2, float v3) + { + float v[] = {v0, v1, v2, v3}; + *pval = vle32_v_f32m1(v, nlanes); + } + operator vfloat32m1_t() const + { + return *pval; + } + float get0() const + { + return vfmv_f(*pval); + } + inline v_float32x4& operator=(const v_float32x4& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_float32x4(const v_float32x4& vec) { + *pval = *(vec.pval); + } + float val[4]; + vfloat32m1_t* pval = (vfloat32m1_t*)val; +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + enum { nlanes = 2 }; + + v_uint64x2() {} + explicit v_uint64x2(vuint64m1_t v) + { + *pval = v; + } + v_uint64x2(uint64 v0, uint64 v1) + { + uint64 v[] = {v0, v1}; + *pval = vle64_v_u64m1(v, nlanes); + } + operator vuint64m1_t() const + { + return *pval; + } + uint64 get0() const + { + return vmv_x(*pval); + } + + inline v_uint64x2& operator=(const v_uint64x2& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_uint64x2(const v_uint64x2& vec) { + *pval = *(vec.pval); + } + uint64 val[2]; + vuint64m1_t* pval = (vuint64m1_t*)val; +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + enum { nlanes = 2 }; + + v_int64x2() {} + explicit v_int64x2(vint64m1_t v) + { + *pval = v; + } + v_int64x2(int64 v0, int64 v1) + { + int64 v[] = {v0, v1}; + *pval = vle64_v_i64m1(v, nlanes); + } + operator vint64m1_t() const + { + return *pval; + } + int64 get0() const + { + return vmv_x(*pval); + } + + inline v_int64x2& operator=(const v_int64x2& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_int64x2(const v_int64x2& vec) { + *pval = *(vec.pval); + } + int64 val[2]; + vint64m1_t* pval = (vint64m1_t*)val; +}; + +#if CV_SIMD128_64F +struct v_float64x2 +{ + typedef double lane_type; + enum { nlanes = 2 }; + + v_float64x2() {} + explicit v_float64x2(vfloat64m1_t v) + { + *pval = v; + } + v_float64x2(double v0, double v1) + { + double v[] = {v0, v1}; + *pval = vle64_v_f64m1(v, nlanes); + } + operator vfloat64m1_t() const + { + return *pval; + } + double get0() const + { + return vfmv_f(*pval); + } + + inline v_float64x2& operator=(const v_float64x2& vec) { + *pval = *(vec.pval); + return *this; + } + inline v_float64x2(const v_float64x2& vec) { + *pval = *(vec.pval); + } + double val[2]; + vfloat64m1_t* pval = (vfloat64m1_t*)val; +}; +#endif // CV_SIMD128_64F +#endif // __clang__ //////////// Initial //////////// @@ -1819,6 +2171,7 @@ inline v_float32x4 v_cvt_f32(const v_int32x4& a) } #if CV_SIMD128_64F +#ifndef __clang__ inline v_float32x4 v_cvt_f32(const v_float64x2& a) { double arr[4] = {a.val[0], a.val[1], 0, 0}; @@ -1832,6 +2185,18 @@ inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) vfloat64m2_t tmp = vle64_v_f64m2(arr, 4); return v_float32x4(vfncvt_f_f_w_f32m1(tmp, 4)); } +#else +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ + vfloat64m2_t zero = vfmv_v_f_f64m2(0, 4); + return v_float32x4(vfncvt_f_f_w_f32m1(vset_v_f64m1_f64m2(zero, 0, a), 4)); +} +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + vfloat64m2_t dst = vlmul_ext_v_f64m1_f64m2(a); + return v_float32x4(vfncvt_f_f_w_f32m1(vset_v_f64m1_f64m2(dst, 1, b), 4)); +} +#endif inline v_float64x2 v_cvt_f64(const v_int32x4& a) { @@ -2351,6 +2716,7 @@ OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint64x2, v_int64x2, uint64, int64, u64) //////////// SignMask //////////// +#ifndef __clang__ #define OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(_Tpvec, _Tp, suffix, vl, shift) \ inline int v_signmask(const _Tpvec& a) \ { \ @@ -2381,6 +2747,36 @@ inline int v_signmask(const v_float64x2& a) { return v_signmask(v_reinterpret_as_u64(a)); } #endif +#else +#define OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(_Tpvec, width, vl) \ +inline int v_signmask(const _Tpvec& a) \ +{ \ + uint8_t ans[16] = {0};\ + vsm(ans, vmslt(a, 0, vl), vl);\ + return reinterpret_cast(ans)[0];\ +} + +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int8x16, 8, 16) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int16x8, 16, 8) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int32x4, 32, 4) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int64x2, 64, 2) + +inline int v_signmask(const v_uint8x16& a) +{ return v_signmask(v_reinterpret_as_s8(a)); } +inline int v_signmask(const v_uint16x8& a) +{ return v_signmask(v_reinterpret_as_s16(a)); } +inline int v_signmask(const v_uint32x4& a) +{ return v_signmask(v_reinterpret_as_s32(a)); } +inline int v_signmask(const v_float32x4& a) +{ return v_signmask(v_reinterpret_as_s32(a)); } +inline int v_signmask(const v_uint64x2& a) +{ return v_signmask(v_reinterpret_as_s64(a)); } +#if CV_SIMD128_64F +inline int v_signmask(const v_float64x2& a) +{ return v_signmask(v_reinterpret_as_s64(a)); } +#endif + +#endif //////////// Scan forward //////////// @@ -2520,6 +2916,7 @@ inline v_int32x4 v_trunc(const v_float32x4& a) return v_int32x4(vfcvt_rtz_x_f_v_i32m1(a, 4)); } #if CV_SIMD128_64F +#ifndef __clang__ inline v_int32x4 v_round(const v_float64x2& a) { double arr[4] = {a.val[0], a.val[1], 0, 0}; @@ -2554,6 +2951,42 @@ inline v_int32x4 v_trunc(const v_float64x2& a) vfloat64m2_t tmp = vle64_v_f64m2(arr, 4); return v_int32x4(vfncvt_rtz_x_f_w_i32m1(tmp, 4)); } + +#else +inline v_int32x4 v_round(const v_float64x2& a) +{ + vfloat64m2_t zero = vfmv_v_f_f64m2(0, 4); + return v_int32x4(vfncvt_x_f_w_i32m1(vset_v_f64m1_f64m2(zero, 0, a), 4)); +} + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + vfloat64m2_t dst = vlmul_ext_v_f64m1_f64m2(a); + return v_int32x4(vfncvt_x_f_w_i32m1(vset_v_f64m1_f64m2(dst, 1, b), 4)); +} + +inline v_int32x4 v_floor(const v_float64x2& a) +{ + vfloat64m2_t dst = vfmv_v_f_f64m2(0, 4); + dst = vset_v_f64m1_f64m2(dst, 0, a); + dst = vfsub_vf_f64m2(dst, 0.5, 2); + return v_int32x4(vfncvt_x_f_w_i32m1(dst, 4)); +} + +inline v_int32x4 v_ceil(const v_float64x2& a) +{ + vfloat64m2_t dst = vfmv_v_f_f64m2(0, 4); + dst = vset_v_f64m1_f64m2(dst, 0, a); + dst = vfadd_vf_f64m2(dst, 0.5, 2); + return v_int32x4(vfncvt_x_f_w_i32m1(dst, 4)); +} + +inline v_int32x4 v_trunc(const v_float64x2& a) +{ + vfloat64m2_t zero = vfmv_v_f_f64m2(0, 4); + return v_int32x4(vfncvt_rtz_x_f_w_i32m1(vset_v_f64m1_f64m2(zero, 0, a), 4)); +} +#endif #endif diff --git a/modules/core/include/opencv2/core/hal/msa_macros.h b/modules/core/include/opencv2/core/hal/msa_macros.h index bd6ddb127a..fad8c5adda 100644 --- a/modules/core/include/opencv2/core/hal/msa_macros.h +++ b/modules/core/include/opencv2/core/hal/msa_macros.h @@ -719,7 +719,7 @@ typedef double v1f64 __attribute__ ((vector_size(8), aligned(8))); v2i64 _c; \ _b = __builtin_msa_hadd_s_w(__a, __a); \ _c = __builtin_msa_hadd_s_d(_b, _b); \ - (int16_t)(_c[0] + _c[1]); \ + (int32_t)(_c[0] + _c[1]); \ }) @@ -736,7 +736,7 @@ typedef double v1f64 __attribute__ ((vector_size(8), aligned(8))); ({ \ v2i64 _b; \ _b = __builtin_msa_hadd_s_d(__a, __a); \ - (int32_t)(_b[0] + _b[1]); \ + (int64_t)(_b[0] + _b[1]); \ }) /* uint8_t msa_sum_u8(v16u8 __a)*/ @@ -756,7 +756,7 @@ typedef double v1f64 __attribute__ ((vector_size(8), aligned(8))); v4i32 _c32; \ _b16 = __builtin_msa_hadd_s_h(__a, __a); \ _c32 = __builtin_msa_hadd_s_w(_b16, _b16); \ - (int8_t)msa_sum_s32(_c32); \ + (int16_t)msa_sum_s32(_c32); \ }) /* float msa_sum_f32(v4f32 __a)*/ diff --git a/modules/core/include/opencv2/core/matx.hpp b/modules/core/include/opencv2/core/matx.hpp index 9189a08a14..2fbe8410cf 100644 --- a/modules/core/include/opencv2/core/matx.hpp +++ b/modules/core/include/opencv2/core/matx.hpp @@ -382,6 +382,14 @@ public: Vec(const Vec<_Tp, cn>& v); static Vec all(_Tp alpha); + static Vec ones(); + static Vec randn(_Tp a, _Tp b); + static Vec randu(_Tp a, _Tp b); + static Vec zeros(); +#ifdef CV_CXX11 + static Vec diag(_Tp alpha) = delete; + static Vec eye() = delete; +#endif //! per-element multiplication Vec mul(const Vec<_Tp, cn>& v) const; @@ -1061,6 +1069,18 @@ Vec<_Tp, cn> Vec<_Tp, cn>::all(_Tp alpha) return v; } +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::ones() +{ + return Vec::all(1); +} + +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::zeros() +{ + return Vec::all(0); +} + template inline Vec<_Tp, cn> Vec<_Tp, cn>::mul(const Vec<_Tp, cn>& v) const { diff --git a/modules/core/include/opencv2/core/operations.hpp b/modules/core/include/opencv2/core/operations.hpp index b2ddf77daf..ad84b87db2 100644 --- a/modules/core/include/opencv2/core/operations.hpp +++ b/modules/core/include/opencv2/core/operations.hpp @@ -230,6 +230,22 @@ Matx<_Tp,m,n> Matx<_Tp,m,n>::randn(_Tp a, _Tp b) return M; } +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::randu(_Tp a, _Tp b) +{ + Vec<_Tp,cn> V; + cv::randu(V, Scalar(a), Scalar(b)); + return V; +} + +template inline +Vec<_Tp, cn> Vec<_Tp, cn>::randn(_Tp a, _Tp b) +{ + Vec<_Tp,cn> V; + cv::randn(V, Scalar(a), Scalar(b)); + return V; +} + template inline Matx<_Tp, n, m> Matx<_Tp, m, n>::inv(int method, bool *p_is_ok /*= NULL*/) const { diff --git a/modules/core/include/opencv2/core/vsx_utils.hpp b/modules/core/include/opencv2/core/vsx_utils.hpp index 5cbc066784..79a1074d59 100644 --- a/modules/core/include/opencv2/core/vsx_utils.hpp +++ b/modules/core/include/opencv2/core/vsx_utils.hpp @@ -324,6 +324,7 @@ VSX_IMPL_1RG(vec_udword2, vec_float4, xvcvspuxds, vec_ctulo) #define VSX_IMPL_CONVERT(rt, rg, fnm) \ VSX_FINLINE(rt) fnm(const rg& a) { return __builtin_convertvector(a, rt); } +#ifndef vec_permi #if __clang_major__ < 5 // implement vec_permi in a dirty way # define VSX_IMPL_CLANG_4_PERMI(Tvec) \ @@ -351,12 +352,14 @@ VSX_FINLINE(rt) fnm(const rg& a) { return __builtin_convertvector(a, rt); } // vec_xxpermdi is missing little-endian supports in clang 4 just like gcc4 # define vec_permi(a, b, c) vec_xxpermdi(b, a, (3 ^ (((c) & 1) << 1 | (c) >> 1))) #endif // __clang_major__ < 5 +#endif // shift left double by word immediate #ifndef vec_sldw # define vec_sldw vec_xxsldwi #endif +#if __clang_major__ < 13 // Implement vec_rsqrt since clang only supports vec_rsqrte #ifndef vec_rsqrt VSX_FINLINE(vec_float4) vec_rsqrt(const vec_float4& a) @@ -380,6 +383,7 @@ VSX_FINLINE(vec_udword2) vec_promote(unsigned long long a, int b) ret[b & 1] = a; return ret; } +#endif // vec_popcnt should return unsigned but clang has different thought just like gcc in vec_vpopcnt #define VSX_IMPL_POPCNTU(Tvec, Tvec2, ucast) \ diff --git a/modules/core/misc/objc/gen_dict.json b/modules/core/misc/objc/gen_dict.json index a645df19f5..e3f28bf529 100644 --- a/modules/core/misc/objc/gen_dict.json +++ b/modules/core/misc/objc/gen_dict.json @@ -78,6 +78,26 @@ "(void)divide:(double)scale src2:(Mat*)src2 dst:(Mat*)dst dtype:(int)dtype" : { "src2" : {"name" : "src"} } } }, + "header_fix" : { + "Core": { + "pow" : { + "prolog" : "#pragma push_macro(\"pow\")\n#undef pow", + "epilog" : "#pragma pop_macro(\"pow\")" + }, + "sqrt" : { + "prolog" : "#pragma push_macro(\"sqrt\")\n#undef sqrt", + "epilog" : "#pragma pop_macro(\"sqrt\")" + }, + "exp" : { + "prolog" : "#pragma push_macro(\"exp\")\n#undef exp", + "epilog" : "#pragma pop_macro(\"exp\")" + }, + "log" : { + "prolog" : "#pragma push_macro(\"log\")\n#undef log", + "epilog" : "#pragma pop_macro(\"log\")" + } + } + }, "type_dict" : { "Algorithm": { "objc_type": "Algorithm*" diff --git a/modules/core/perf/perf_arithm.cpp b/modules/core/perf/perf_arithm.cpp index 70e2f49210..3ac9a24639 100644 --- a/modules/core/perf/perf_arithm.cpp +++ b/modules/core/perf/perf_arithm.cpp @@ -1,4 +1,5 @@ #include "perf_precomp.hpp" +#include namespace opencv_test { @@ -393,6 +394,29 @@ PERF_TEST_P_(BinaryOpTest, reciprocal) SANITY_CHECK_NOTHING(); } + +PERF_TEST_P_(BinaryOpTest, transposeND) +{ + Size sz = get<0>(GetParam()); + int type = get<1>(GetParam()); + cv::Mat a = Mat(sz, type).reshape(1); + + std::vector order(a.dims); + std::iota(order.begin(), order.end(), 0); + std::reverse(order.begin(), order.end()); + + std::vector new_sz(a.dims); + std::copy(a.size.p, a.size.p + a.dims, new_sz.begin()); + std::reverse(new_sz.begin(), new_sz.end()); + cv::Mat b = Mat(new_sz, type); + + declare.in(a,WARMUP_RNG).out(b); + + TEST_CYCLE() cv::transposeND(a, order, b); + + SANITY_CHECK_NOTHING(); +} + INSTANTIATE_TEST_CASE_P(/*nothing*/ , BinaryOpTest, testing::Combine( testing::Values(szVGA, sz720p, sz1080p), diff --git a/modules/core/src/directx.cpp b/modules/core/src/directx.cpp index 2dbc3e2763..7b423cc29f 100644 --- a/modules/core/src/directx.cpp +++ b/modules/core/src/directx.cpp @@ -1385,6 +1385,7 @@ void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst OpenCL_D3D11_NV* impl_nv = ctx.getUserContext().get(); if (impl_nv) { __convertFromD3D11Texture2DNV(pD3D11Texture2D,dst); + return; } #endif OpenCL_D3D11* impl = ctx.getUserContext().get(); diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index f968bec02f..05c6d3bd1f 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -270,6 +270,9 @@ void cartToPolar( InputArray src1, InputArray src2, { CV_INSTRUMENT_REGION(); + CV_Assert(src1.getObj() != dst1.getObj() && src1.getObj() != dst2.getObj() && + src2.getObj() != dst1.getObj() && src2.getObj() != dst2.getObj()); + CV_OCL_RUN(dst1.isUMat() && dst2.isUMat(), ocl_cartToPolar(src1, src2, dst1, dst2, angleInDegrees)) @@ -564,6 +567,9 @@ void polarToCart( InputArray src1, InputArray src2, { CV_INSTRUMENT_REGION(); + CV_Assert(src1.getObj() != dst1.getObj() && src1.getObj() != dst2.getObj() && + src2.getObj() != dst1.getObj() && src2.getObj() != dst2.getObj()); + int type = src2.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); CV_Assert((depth == CV_32F || depth == CV_64F) && (src1.empty() || src1.type() == type)); diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 6a381c15a0..8111dc2230 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -664,6 +664,8 @@ void Mat::create(int d, const int* _sizes, int _type) if( data && (d == dims || (d == 1 && dims <= 2)) && _type == type() ) { + if ( dims == 1 && (d == 1 && _sizes[0] == size[0]) ) + return; if( d == 2 && rows == _sizes[0] && cols == _sizes[1] ) return; for( i = 0; i < d; i++ ) diff --git a/modules/core/src/matrix_transform.cpp b/modules/core/src/matrix_transform.cpp index 727eaf7fee..05ecf450e1 100644 --- a/modules/core/src/matrix_transform.cpp +++ b/modules/core/src/matrix_transform.cpp @@ -4,6 +4,7 @@ #include "precomp.hpp" #include "opencl_kernels_core.hpp" +#include "opencv2/core/detail/dispatch_helper.impl.hpp" namespace cv { @@ -282,6 +283,72 @@ void transpose( InputArray _src, OutputArray _dst ) } +void transposeND(InputArray src_, const std::vector& order, OutputArray dst_) +{ + Mat inp = src_.getMat(); + CV_Assert(inp.isContinuous()); + CV_CheckEQ(inp.channels(), 1, "Input array should be single-channel"); + CV_CheckEQ(order.size(), static_cast(inp.dims), "Number of dimensions shouldn't change"); + + auto order_ = order; + std::sort(order_.begin(), order_.end()); + for (size_t i = 0; i < order_.size(); ++i) + { + CV_CheckEQ(static_cast(order_[i]), i, "New order should be a valid permutation of the old one"); + } + + std::vector newShape(order.size()); + for (size_t i = 0; i < order.size(); ++i) + { + newShape[i] = inp.size[order[i]]; + } + + dst_.create(static_cast(newShape.size()), newShape.data(), inp.type()); + Mat out = dst_.getMat(); + CV_Assert(out.isContinuous()); + CV_Assert(inp.data != out.data); + + int continuous_idx = 0; + for (int i = static_cast(order.size()) - 1; i >= 0; --i) + { + if (order[i] != i) + { + continuous_idx = i + 1; + break; + } + } + + size_t continuous_size = continuous_idx == 0 ? out.total() : out.step1(continuous_idx - 1); + size_t outer_size = out.total() / continuous_size; + + std::vector steps(order.size()); + for (int i = 0; i < static_cast(steps.size()); ++i) + { + steps[i] = inp.step1(order[i]); + } + + auto* src = inp.ptr(); + auto* dst = out.ptr(); + + size_t src_offset = 0; + size_t es = out.elemSize(); + for (size_t i = 0; i < outer_size; ++i) + { + std::memcpy(dst, src + es * src_offset, es * continuous_size); + dst += es * continuous_size; + for (int j = continuous_idx - 1; j >= 0; --j) + { + src_offset += steps[j]; + if ((src_offset / steps[j]) % out.size[j] != 0) + { + break; + } + src_offset -= steps[j] * out.size[j]; + } + } +} + + #if CV_SIMD128 template CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) { diff --git a/modules/core/src/persistence.cpp b/modules/core/src/persistence.cpp index 6789c78e9d..09435b5850 100644 --- a/modules/core/src/persistence.cpp +++ b/modules/core/src/persistence.cpp @@ -817,7 +817,7 @@ char *FileStorage::Impl::gets(size_t maxCount) { int delta = (int) strlen(ptr); ofs += delta; maxCount -= delta; - if (ptr[delta - 1] == '\n' || maxCount == 0) + if (delta == 0 || ptr[delta - 1] == '\n' || maxCount == 0) break; if (delta == count) buffer.resize((size_t) (buffer.size() * 1.5)); diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index 84f0039dc9..1741712a6e 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -615,6 +615,9 @@ struct HWFeatures #if defined _ARM_ && (defined(_WIN32_WCE) && _WIN32_WCE >= 0x800) have[CV_CPU_NEON] = true; #endif + #if defined _M_ARM64 + have[CV_CPU_NEON] = true; + #endif #ifdef __riscv_vector have[CV_CPU_RISCVV] = true; #endif diff --git a/modules/core/src/va_wrapper.impl.hpp b/modules/core/src/va_wrapper.impl.hpp index 260d3ba49b..77faa984d0 100644 --- a/modules/core/src/va_wrapper.impl.hpp +++ b/modules/core/src/va_wrapper.impl.hpp @@ -15,18 +15,33 @@ typedef VAStatus (*FN_vaDestroyImage)(VADisplay dpy, VAImageID image); typedef VAStatus (*FN_vaMapBuffer)(VADisplay dpy, VABufferID buf_id, void **pbuf); typedef VAStatus (*FN_vaSyncSurface)(VADisplay dpy, VASurfaceID render_target); typedef VAStatus (*FN_vaUnmapBuffer)(VADisplay dpy, VABufferID buf_id); +typedef int (*FN_vaMaxNumImageFormats)(VADisplay dpy); +typedef VAStatus (*FN_vaQueryImageFormats)(VADisplay dpy, VAImageFormat *format_list, int *num_formats); +typedef VAStatus (*FN_vaCreateImage)(VADisplay dpy, VAImageFormat *format, int width, int height, VAImage *image); +typedef VAStatus (*FN_vaPutImage)(VADisplay dpy, VASurfaceID surface, VAImageID image, int src_x, int src_y, unsigned int src_width, unsigned int src_height, int dest_x, int dest_y, unsigned int dest_width, unsigned int dest_height); +typedef VAStatus (*FN_vaGetImage)(VADisplay dpy, VASurfaceID surface, int x, int y, unsigned int width, unsigned int height, VAImageID image); static FN_vaDeriveImage fn_vaDeriveImage = NULL; static FN_vaDestroyImage fn_vaDestroyImage = NULL; static FN_vaMapBuffer fn_vaMapBuffer = NULL; static FN_vaSyncSurface fn_vaSyncSurface = NULL; static FN_vaUnmapBuffer fn_vaUnmapBuffer = NULL; +static FN_vaMaxNumImageFormats fn_vaMaxNumImageFormats = NULL; +static FN_vaQueryImageFormats fn_vaQueryImageFormats = NULL; +static FN_vaCreateImage fn_vaCreateImage = NULL; +static FN_vaPutImage fn_vaPutImage = NULL; +static FN_vaGetImage fn_vaGetImage = NULL; #define vaDeriveImage fn_vaDeriveImage #define vaDestroyImage fn_vaDestroyImage #define vaMapBuffer fn_vaMapBuffer #define vaSyncSurface fn_vaSyncSurface #define vaUnmapBuffer fn_vaUnmapBuffer +#define vaMaxNumImageFormats fn_vaMaxNumImageFormats +#define vaQueryImageFormats fn_vaQueryImageFormats +#define vaCreateImage fn_vaCreateImage +#define vaPutImage fn_vaPutImage +#define vaGetImage fn_vaGetImage static std::shared_ptr loadLibVA() @@ -76,6 +91,11 @@ static void init_libva() VA_LOAD_SYMBOL(vaMapBuffer); VA_LOAD_SYMBOL(vaSyncSurface); VA_LOAD_SYMBOL(vaUnmapBuffer); + VA_LOAD_SYMBOL(vaMaxNumImageFormats); + VA_LOAD_SYMBOL(vaQueryImageFormats); + VA_LOAD_SYMBOL(vaCreateImage); + VA_LOAD_SYMBOL(vaPutImage); + VA_LOAD_SYMBOL(vaGetImage); initialized = true; } if (!library) diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index 014a0cff0a..6d50b5a8f7 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -3,6 +3,7 @@ // of this distribution and at http://opencv.org/license.html. #include "test_precomp.hpp" #include "ref_reduce_arg.impl.hpp" +#include namespace opencv_test { namespace { @@ -2128,6 +2129,79 @@ TEST(Core_minMaxIdx, regression_9207_1) } +class TransposeND : public testing::TestWithParam< tuple, perf::MatType> > +{ +public: + std::vector m_shape; + int m_type; + + void SetUp() + { + std::tie(m_shape, m_type) = GetParam(); + } +}; + + +TEST_P(TransposeND, basic) +{ + Mat inp(m_shape, m_type); + randu(inp, 0, 255); + + std::vector order(m_shape.size()); + std::iota(order.begin(), order.end(), 0); + auto transposer = [&order] (const std::vector& id) + { + std::vector ret(id.size()); + for (size_t i = 0; i < id.size(); ++i) + { + ret[i] = id[order[i]]; + } + return ret; + }; + auto advancer = [&inp] (std::vector& id) + { + for (int j = static_cast(id.size() - 1); j >= 0; --j) + { + ++id[j]; + if (id[j] != inp.size[j]) + { + break; + } + id[j] = 0; + } + }; + + do + { + Mat out; + cv::transposeND(inp, order, out); + std::vector id(order.size()); + for (size_t i = 0; i < inp.total(); ++i) + { + auto new_id = transposer(id); + switch (inp.type()) + { + case CV_8UC1: + ASSERT_EQ(inp.at(id.data()), out.at(new_id.data())); + break; + case CV_32FC1: + ASSERT_EQ(inp.at(id.data()), out.at(new_id.data())); + break; + default: + FAIL() << "Unsupported type: " << inp.type(); + } + advancer(id); + } + } while (std::next_permutation(order.begin(), order.end())); +} + + +INSTANTIATE_TEST_CASE_P(Arithm, TransposeND, testing::Combine( + testing::Values(std::vector{2, 3, 4}, std::vector{5, 10}), + testing::Values(perf::MatType(CV_8UC1), CV_32FC1) +)); + + TEST(Core_minMaxIdx, regression_9207_2) { const int rows = 13; @@ -2546,5 +2620,36 @@ TEST(Core_Magnitude, regression_19506) } } +TEST(Core_CartPolar, inplace) +{ + RNG& rng = TS::ptr()->get_rng(); + cv::Mat1d A[2] = {cv::Mat1d(10, 10), cv::Mat1d(10, 10)}; + cv::Mat1d B[2], C[2]; + cv::UMat uA[2]; + + for(int i = 0; i < 2; ++i) + { + cvtest::randUni(rng, A[i], Scalar::all(-1000), Scalar::all(1000)); + A[i].copyTo(uA[i]); + } + + // Reverse + cv::cartToPolar(A[0], A[1], B[0], B[1], false); + cv::polarToCart(B[0], B[1], C[0], C[1], false); + EXPECT_MAT_NEAR(A[0], C[0], 2); + EXPECT_MAT_NEAR(A[1], C[1], 2); + + // Inplace + EXPECT_THROW(cv::polarToCart(B[0], B[1], B[0], B[1], false), cv::Exception); + EXPECT_THROW(cv::polarToCart(B[0], B[1], B[1], B[0], false), cv::Exception); + EXPECT_THROW(cv::cartToPolar(A[0], A[1], A[0], A[1], false), cv::Exception); + EXPECT_THROW(cv::cartToPolar(A[0], A[1], A[1], A[0], false), cv::Exception); + // Inplace OCL + EXPECT_THROW(cv::polarToCart(uA[0], uA[1], uA[0], uA[1]), cv::Exception); + EXPECT_THROW(cv::polarToCart(uA[0], uA[1], uA[1], uA[0]), cv::Exception); + EXPECT_THROW(cv::cartToPolar(uA[0], uA[1], uA[0], uA[1]), cv::Exception); + EXPECT_THROW(cv::cartToPolar(uA[0], uA[1], uA[0], uA[1]), cv::Exception); + +} }} // namespace diff --git a/modules/core/test/test_io.cpp b/modules/core/test/test_io.cpp index 4218cb9297..04426e58f7 100644 --- a/modules/core/test/test_io.cpp +++ b/modules/core/test/test_io.cpp @@ -3,6 +3,8 @@ // of this distribution and at http://opencv.org/license.html. #include "test_precomp.hpp" +#include + namespace opencv_test { namespace { static SparseMat cvTsGetRandomSparseMat(int dims, const int* sz, int type, @@ -799,6 +801,25 @@ TEST(Core_InputOutput, filestorage_base64_basic_memory_JSON) test_filestorage_basic(cv::FileStorage::WRITE_BASE64, ".json", true, true); } +// issue #21851 +TEST(Core_InputOutput, filestorage_heap_overflow) +{ + const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); + CV_Assert(test_info); + + std::string name = std::string(test_info->test_case_name()) + "--" + test_info->name(); + const char data[] = {0x00, 0x2f, 0x4a, 0x4a, 0x50, 0x4a, 0x4a }; + + std::ofstream file; + file.open(name, std::ios_base::binary); + assert(file.is_open()); + + file.write(data, sizeof(data)); + file.close(); + + // This just shouldn't segfault, otherwise it's fine + EXPECT_ANY_THROW(FileStorage(name, FileStorage::READ)); +} TEST(Core_InputOutput, filestorage_base64_valid_call) { diff --git a/modules/core/test/test_mat.cpp b/modules/core/test/test_mat.cpp index 0c54b55ac6..95184fd11c 100644 --- a/modules/core/test/test_mat.cpp +++ b/modules/core/test/test_mat.cpp @@ -2370,6 +2370,18 @@ TEST(Mat, ptrVecni_20044) EXPECT_EQ(int(6), *(ci)); } + +TEST(Mat, VecMatx_4650) +{ + // Makes sure the following compiles. + cv::Vec3b a; + a = cv::Vec3b::ones(); + a = cv::Vec3b::zeros(); + a = cv::Vec3b::randn(0, 10); + a = cv::Vec3b::randu(0, 10); +} + + TEST(Mat, reverse_iterator_19967) { // empty iterator (#16855) @@ -2448,4 +2460,16 @@ TEST(Mat, reverse_iterator_19967) } +TEST(Mat, Recreate1DMatWithSameMeta) +{ + std::vector dims = {100}; + auto depth = CV_8U; + cv::Mat m(dims, depth); + + // By default m has dims: [1, 100] + m.dims = 1; + + EXPECT_NO_THROW(m.create(dims, depth)); +} + }} // namespace diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index a9540f1088..e0773d5214 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -23,6 +23,10 @@ if(WITH_WEBNN AND HAVE_WEBNN) add_definitions(-DHAVE_WEBNN=1) endif() +if(HAVE_TIMVX) + add_definitions(-DHAVE_TIMVX=1) +endif() + ocv_option(OPENCV_DNN_CUDA "Build with CUDA support" HAVE_CUDA AND HAVE_CUBLAS @@ -146,6 +150,11 @@ if(HAVE_TENGINE) list(APPEND libs -Wl,--whole-archive ${TENGINE_LIBRARIES} -Wl,--no-whole-archive) endif() +if(HAVE_TIMVX) + list(APPEND include_dirs ${TIMVX_INCLUDE_DIR}) + list(APPEND libs -Wl,--whole-archive ${TIMVX_LIBRARY} -Wl,--no-whole-archive) +endif() + set(webnn_srcs "") if(NOT EMSCRIPTEN) if(HAVE_WEBNN) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 44b16f7800..dae8701970 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -262,7 +262,7 @@ CV__DNN_INLINE_NS_BEGIN { public: int input_zp, output_zp; - float output_sc; + float input_sc, output_sc; static Ptr create(const LayerParams& params); }; @@ -322,9 +322,24 @@ CV__DNN_INLINE_NS_BEGIN { public: int input_zp, output_zp; + float input_sc, output_sc; static Ptr create(const LayerParams& params); }; + class CV_EXPORTS ReduceLayer : public Layer + { + public: + int reduceType; + std::vector reduceDims; + static Ptr create(const LayerParams& params); + }; + + class CV_EXPORTS ReduceLayerInt8 : public ReduceLayer + { + public: + static Ptr create(const LayerParams& params); + }; + class CV_EXPORTS SoftmaxLayer : public Layer { public: @@ -351,7 +366,8 @@ CV__DNN_INLINE_NS_BEGIN class CV_EXPORTS InnerProductLayerInt8 : public InnerProductLayer { public: - int output_zp; + int input_zp, output_zp; + float input_sc, output_sc; static Ptr create(const LayerParams& params); }; @@ -778,6 +794,26 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams ¶ms); }; + class CV_EXPORTS SignLayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS ShrinkLayer : public ActivationLayer + { + public: + float bias; + float lambd; + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS ReciprocalLayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + /* Layers used in semantic segmentation */ class CV_EXPORTS CropLayer : public Layer diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 97033a313e..8bca6c538b 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -75,6 +75,7 @@ CV__DNN_INLINE_NS_BEGIN DNN_BACKEND_VKCOM, DNN_BACKEND_CUDA, DNN_BACKEND_WEBNN, + DNN_BACKEND_TIMVX, #ifdef __OPENCV_BUILD DNN_BACKEND_INFERENCE_ENGINE_NGRAPH = 1000000, // internal - use DNN_BACKEND_INFERENCE_ENGINE + setInferenceEngineBackendType() DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019, // internal - use DNN_BACKEND_INFERENCE_ENGINE + setInferenceEngineBackendType() @@ -95,7 +96,8 @@ CV__DNN_INLINE_NS_BEGIN DNN_TARGET_FPGA, //!< FPGA device with CPU fallbacks using Inference Engine's Heterogeneous plugin. DNN_TARGET_CUDA, DNN_TARGET_CUDA_FP16, - DNN_TARGET_HDDL + DNN_TARGET_HDDL, + DNN_TARGET_NPU, }; CV_EXPORTS std::vector< std::pair > getAvailableBackends(); @@ -321,6 +323,19 @@ CV__DNN_INLINE_NS_BEGIN const std::vector>& outputs ); + /** + * @brief Returns a TimVX backend node + * + * @param timVxInfo void pointer to CSLContext object + * @param inputsWrapper layer inputs + * @param outputsWrapper layer outputs + * @param isLast if the node is the last one of the TimVX Graph. + */ + virtual Ptr initTimVX(void* timVxInfo, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast); + /** * @brief Automatic Halide scheduling based on layer hyper-parameters. * @param[in] node Backend node with Halide functions. @@ -389,7 +404,7 @@ CV__DNN_INLINE_NS_BEGIN /** - * @brief "Deattaches" all the layers, attached to particular layer. + * @brief "Detaches" all the layers, attached to particular layer. */ virtual void unsetAttached(); @@ -1310,6 +1325,9 @@ CV__DNN_INLINE_NS_BEGIN class CV_EXPORTS_W_SIMPLE ClassificationModel : public Model { public: + CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) + ClassificationModel(); + /** * @brief Create classification model from network represented in one of the supported formats. * An order of @p model and @p config arguments does not matter. @@ -1324,6 +1342,24 @@ CV__DNN_INLINE_NS_BEGIN */ CV_WRAP ClassificationModel(const Net& network); + /** + * @brief Set enable/disable softmax post processing option. + * + * If this option is true, softmax is applied after forward inference within the classify() function + * to convert the confidences range to [0.0-1.0]. + * This function allows you to toggle this behavior. + * Please turn true when not contain softmax layer in model. + * @param[in] enable Set enable softmax post processing within the classify() function. + */ + CV_WRAP ClassificationModel& setEnableSoftmaxPostProcessing(bool enable); + + /** + * @brief Get enable/disable softmax post processing option. + * + * This option defaults to false, softmax post processing is not applied within the classify() function. + */ + CV_WRAP bool getEnableSoftmaxPostProcessing() const; + /** @brief Given the @p input frame, create input blob, run net and return top-1 prediction. * @param[in] frame The input image. */ @@ -1558,7 +1594,7 @@ public: * - top-right * - bottom-right * - * Use cv::getPerspectiveTransform function to retrive image region without perspective transformations. + * Use cv::getPerspectiveTransform function to retrieve image region without perspective transformations. * * @note If DL model doesn't support that kind of output then result may be derived from detectTextRectangles() output. * diff --git a/modules/dnn/src/cuda/activations.cu b/modules/dnn/src/cuda/activations.cu index f5dafcea7f..e12457a164 100644 --- a/modules/dnn/src/cuda/activations.cu +++ b/modules/dnn/src/cuda/activations.cu @@ -248,6 +248,21 @@ void selu(const Stream& stream, Span output, View input, T alpha, T gamma) generic_op>(stream, output, input, {alpha, gamma}); } +template +void sign(const Stream& stream, Span output, View input) { + generic_op>(stream, output, input); +} + +template +void shrink(const Stream& stream, Span output, View input, T bias, T lambd) { + generic_op>(stream, output, input, {bias, lambd}); +} + +template +void reciprocal(const Stream& stream, Span output, View input) { + generic_op>(stream, output, input); +} + template void thresholdedrelu(const Stream& stream, Span output, View input, T alpha) { generic_op>(stream, output, input, {alpha}); @@ -312,6 +327,9 @@ template void selu<__half>(const Stream&, Span<__half>, View<__half>, __half, __ template void thresholdedrelu<__half>(const Stream&, Span<__half>, View<__half>, __half); template void power<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half); template void exp<__half>(const Stream&, Span<__half>, View<__half>, __half, __half); +template void sign<__half>(const Stream&, Span<__half>, View<__half>); +template void shrink<__half>(const Stream&, Span<__half>, View<__half>, __half, __half); +template void reciprocal<__half>(const Stream&, Span<__half>, View<__half>); #endif @@ -351,6 +369,9 @@ template void selu(const Stream&, Span, View, float, float) template void thresholdedrelu(const Stream&, Span, View, float); template void power(const Stream&, Span, View, float, float, float); template void exp(const Stream&, Span, View, float, float); +template void sign(const Stream&, Span, View); +template void shrink(const Stream&, Span, View, float, float); +template void reciprocal(const Stream&, Span, View); template static void launch_vectorized_axiswise_relu(const Stream& stream, Span output, View input, std::size_t inner_size, View slope) { diff --git a/modules/dnn/src/cuda/concat.cu b/modules/dnn/src/cuda/concat.cu index ac1be75682..5250b59518 100644 --- a/modules/dnn/src/cuda/concat.cu +++ b/modules/dnn/src/cuda/concat.cu @@ -100,7 +100,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { CV_Assert(output.rank() == input.rank()); CV_Assert(output_axis_offset < output.get_axis_size(axis)); - /* if axes preceeding the concat axis are all singleton, the concat blocks are contiguous + /* if axes preceding the concat axis are all singleton, the concat blocks are contiguous * in the output and we can copy each block directly */ if (output.size_range(0, axis) == 1) diff --git a/modules/dnn/src/cuda/functors.hpp b/modules/dnn/src/cuda/functors.hpp index 640c7c8ad6..83a949f8e7 100644 --- a/modules/dnn/src/cuda/functors.hpp +++ b/modules/dnn/src/cuda/functors.hpp @@ -726,6 +726,52 @@ struct DivFunctor { CUDA4DNN_DEVICE T operator()(T x, T y) { return x / y; } }; +template +struct SignFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() {} + }; + + CUDA4DNN_DEVICE SignFunctor() { } + CUDA4DNN_DEVICE SignFunctor(const Params& params) { } + + CUDA4DNN_DEVICE T operator()(T value) { + return value > T(0) ? T(1) : (value < T(0) ? T(-1) : T(0)); + } +}; + +template +struct ShrinkFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() : bias(0), lambd(0.5) { } + CUDA4DNN_HOST_DEVICE Params(T bias_, T lambd_) : bias(bias_), lambd(lambd_) { } + T bias, lambd; + }; + + CUDA4DNN_DEVICE ShrinkFunctor() : ShrinkFunctor(Params{}) { } + CUDA4DNN_DEVICE ShrinkFunctor(const Params& params) : bias{params.bias}, lambd{params.lambd} { } + + CUDA4DNN_DEVICE T operator()(T value) { + return value > lambd ? value - bias : (value < -lambd ? value + bias : T(0)); + } + + T bias, lambd; +}; + +template +struct ReciprocalFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() {} + }; + + CUDA4DNN_DEVICE ReciprocalFunctor() { } + CUDA4DNN_DEVICE ReciprocalFunctor(const Params& params) { } + + CUDA4DNN_DEVICE T operator()(T value) { + return T(1.f)/value; + } +}; + }}}} /* namespace cv::dnn::cuda4dnn::kernels */ #endif /* OPENCV_DNN_SRC_CUDA_FUNCTORS_HPP */ diff --git a/modules/dnn/src/cuda/kernel_dispatcher.hpp b/modules/dnn/src/cuda/kernel_dispatcher.hpp index b0fc658850..0f3e7c4fc4 100644 --- a/modules/dnn/src/cuda/kernel_dispatcher.hpp +++ b/modules/dnn/src/cuda/kernel_dispatcher.hpp @@ -33,7 +33,7 @@ * template * void launch_some_kernel(...); * - * // creates the dispatcher named "some_dispatcher" which invokves the correct instantiation of "launch_some_kernel" + * // creates the dispatcher named "some_dispatcher" which invokes the correct instantiation of "launch_some_kernel" * GENERATE_KERNEL_DISPATCHER(some_dispatcher, launch_some_kernel); * * // internal API function diff --git a/modules/dnn/src/cuda/permute.cu b/modules/dnn/src/cuda/permute.cu index 082c1bf75e..35c95a6737 100644 --- a/modules/dnn/src/cuda/permute.cu +++ b/modules/dnn/src/cuda/permute.cu @@ -72,7 +72,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { __syncthreads(); /* We interchange `threadIdx.x` and `threadIdx.y` so that consecutive output indices map to - * consecutive threads. This would allow writes across threds in a warp to be coalesced. + * consecutive threads. This would allow writes across threads in a warp to be coalesced. */ const index_type out_x = blockIdx.y * TILE_SIZE + threadIdx.x; const index_type out_y_begin = blockIdx.x * TILE_SIZE + threadIdx.y; @@ -156,7 +156,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { * tensor indices be [o1, o2, ...]. The permutation operation essentially copies items * from the input tensor to new locations in the output tensor as dictated by the indices. * - * If the size of the nth axis (say i2) of the input is one the input and output indicies for + * If the size of the nth axis (say i2) of the input is one the input and output indices for * all the elements will be of the form be [i1, 0, ...] and [..., 0, ...] respectively. * The index does not contribute to the element's address calculation and hence would give * identical result if it weren't there. diff --git a/modules/dnn/src/cuda/slice.cu b/modules/dnn/src/cuda/slice.cu index 37b718cd63..461e87e549 100644 --- a/modules/dnn/src/cuda/slice.cu +++ b/modules/dnn/src/cuda/slice.cu @@ -159,7 +159,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { /* We can do a copy if the reduced rank is two and only the first axis is sliced. * The general requirement is that only one axis is sliced and all the axes that - * preceed the sliced axis are singleton. However, the reductions above will remove + * precede the sliced axis are singleton. However, the reductions above will remove * all the leading singleton axes and merge the trailing unsliced axes into one, or * zero if there are no trailing unsliced axes. The latter is handled separately. */ diff --git a/modules/dnn/src/cuda4dnn/csl/cudnn/cudnn.hpp b/modules/dnn/src/cuda4dnn/csl/cudnn/cudnn.hpp index 2370492ad5..9bd8fcfe3b 100644 --- a/modules/dnn/src/cuda4dnn/csl/cudnn/cudnn.hpp +++ b/modules/dnn/src/cuda4dnn/csl/cudnn/cudnn.hpp @@ -287,6 +287,51 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cu cudnnTensorDescriptor_t descriptor; }; + /** An array of number fully packed tensor descriptors + * + * @tparam T type of elements in the tensor + */ + template + class TensorDescriptorsArray + { + public: + TensorDescriptorsArray() noexcept = default; + TensorDescriptorsArray(const TensorDescriptorsArray&) = delete; + TensorDescriptorsArray(TensorDescriptorsArray&& other) noexcept + : descriptors{std::move(other.descriptors)} {} + + TensorDescriptorsArray(int seqLength, std::array dims) + { + for (int i = 0; i < seqLength; ++i) + { + descriptors.emplace_back(dims); + } + } + + ~TensorDescriptorsArray() noexcept = default; + + TensorDescriptorsArray& operator=(const TensorDescriptorsArray&) = delete; + TensorDescriptorsArray& operator=(TensorDescriptorsArray&& other) noexcept + { + descriptors = std::move(other.descriptors); + return *this; + }; + + std::vector get() const noexcept + { + std::vector descPtrs; + descPtrs.reserve(descriptors.size()); + for (auto& desc : descriptors) + { + descPtrs.push_back(desc.get()); + } + return descPtrs; + } + + private: + std::vector> descriptors; + }; + }}}}} /* namespace cv::dnn::cuda4dnn::csl::cudnn */ #endif /* OPENCV_DNN_CUDA4DNN_CSL_CUDNN_HPP */ diff --git a/modules/dnn/src/cuda4dnn/csl/cudnn/recurrent.hpp b/modules/dnn/src/cuda4dnn/csl/cudnn/recurrent.hpp new file mode 100644 index 0000000000..7ba6acdf17 --- /dev/null +++ b/modules/dnn/src/cuda4dnn/csl/cudnn/recurrent.hpp @@ -0,0 +1,195 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_DNN_CUDA4DNN_CSL_CUDNN_RECURRENT_HPP +#define OPENCV_DNN_CUDA4DNN_CSL_CUDNN_RECURRENT_HPP + +#include "cudnn.hpp" +#include + + +namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cudnn { + +/** + */ +class DropoutDescriptor +{ +public: + DropoutDescriptor() noexcept = default; + DropoutDescriptor(const DropoutDescriptor &) = delete; + DropoutDescriptor(DropoutDescriptor &&other) noexcept : descriptor{other.descriptor} + { + states = std::move(other.states); + other.descriptor = nullptr; + } + + /** + */ + DropoutDescriptor(const Handle &handle, float dropout) + { + CUDA4DNN_CHECK_CUDNN(cudnnCreateDropoutDescriptor(&descriptor)); + + // we need additional memory for dropout descriptor + size_t stateSize; + CUDA4DNN_CHECK_CUDNN(cudnnDropoutGetStatesSize(handle.get(), &stateSize)); + states.reset(stateSize); + + try + { + auto seed = 1234ull; // Pick a seed. + CUDA4DNN_CHECK_CUDNN(cudnnSetDropoutDescriptor(descriptor, handle.get(), dropout, + states.get().get(), stateSize, seed)); + } + catch (...) + { + CUDA4DNN_CHECK_CUDNN(cudnnDestroyDropoutDescriptor(descriptor)); + throw; + } + } + + ~DropoutDescriptor() noexcept + { + if (descriptor) + { + CUDA4DNN_CHECK_CUDNN(cudnnDestroyDropoutDescriptor(descriptor)); + } + } + + DropoutDescriptor &operator=(const DropoutDescriptor &) = delete; + DropoutDescriptor &operator=(DropoutDescriptor &&other) noexcept + { + descriptor = other.descriptor; + states = std::move(other.states); + other.descriptor = nullptr; + return *this; + }; + + cudnnDropoutDescriptor_t get() const noexcept { return descriptor; } + +private: + cudnnDropoutDescriptor_t descriptor{nullptr}; + + using value_type = typename ManagedPtr::element_type; + ManagedPtr states; +}; + +/** + */ +template +class RNNDescriptor +{ +public: + enum class RNNMode + { + RNN_RELU, + RNN_TANH, + LSTM, + GRU + }; + + RNNDescriptor() noexcept = default; + RNNDescriptor(const RNNDescriptor &) = delete; + RNNDescriptor(RNNDescriptor &&other) noexcept : descriptor{other.descriptor} + { + other.descriptor = nullptr; + } + + /** + */ + RNNDescriptor(const Handle &handle, RNNMode mode, int hidden_size, int num_layers, + bool bidirectional, const DropoutDescriptor &dropoutDesc) + { + CUDA4DNN_CHECK_CUDNN(cudnnCreateRNNDescriptor(&descriptor)); + const auto rnn_mode = [mode] { + switch (mode) + { + case RNNMode::RNN_RELU: + return CUDNN_RNN_RELU; + case RNNMode::RNN_TANH: + return CUDNN_RNN_TANH; + case RNNMode::LSTM: + return CUDNN_LSTM; + case RNNMode::GRU: + return CUDNN_GRU; + default: + return CUDNN_LSTM; + } + }(); + + try + { + CUDA4DNN_CHECK_CUDNN(cudnnSetRNNDescriptor_v6( + handle.get(), descriptor, hidden_size, num_layers, dropoutDesc.get(), + CUDNN_LINEAR_INPUT, bidirectional ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, + rnn_mode, + algo, //CUDNN_RNN_ALGO_STANDARD, + detail::get_data_type())); + } + catch (...) + { + CUDA4DNN_CHECK_CUDNN(cudnnDestroyRNNDescriptor(descriptor)); + throw; + } + } + + ~RNNDescriptor() noexcept + { + if (descriptor) + { + CUDA4DNN_CHECK_CUDNN(cudnnDestroyRNNDescriptor(descriptor)); + } + } + + RNNDescriptor &operator=(const RNNDescriptor &) = delete; + RNNDescriptor &operator=(RNNDescriptor &&other) noexcept + { + descriptor = other.descriptor; + other.descriptor = nullptr; + return *this; + }; + + cudnnRNNDescriptor_t get() const noexcept { return descriptor; } + +private: + cudnnRNNDescriptor_t descriptor{nullptr}; + cudnnRNNMode_t mode{CUDNN_LSTM}; + // support only one algo for a while + cudnnRNNAlgo_t algo{CUDNN_RNN_ALGO_STANDARD}; +}; + +template +size_t getRNNWorkspaceSize(const Handle &handle, const RNNDescriptor &rnnDesc, + const int seqLength, const TensorDescriptorsArray &inputDesc) +{ + size_t workSize; + CUDA4DNN_CHECK_CUDNN(cudnnGetRNNWorkspaceSize(handle.get(), rnnDesc.get(), seqLength, + inputDesc.get().data(), &workSize)); + return workSize; +} + +template +void LSTMForward(const Handle &handle, const RNNDescriptor &rnnDesc, + const FilterDescriptor &filterDesc, DevicePtr filterPtr, + const TensorDescriptorsArray &inputDesc, DevicePtr inputPtr, + const TensorDescriptor &initialHDesc, DevicePtr initialH, + const TensorDescriptor &initialCDesc, DevicePtr initialC, + const int seqLength, const TensorDescriptorsArray &outputDesc, + DevicePtr yOutputPtr, DevicePtr ycOutputPtr, WorkspaceInstance workspace) +{ + CV_Assert(handle); + + CUDA4DNN_CHECK_CUDNN(cudnnRNNForwardInference(handle.get(), rnnDesc.get(), seqLength, + inputDesc.get().data(), inputPtr.get(), // input sequence + initialHDesc.get(), initialH.get(), + initialCDesc.get(), initialC.get(), // hidden + filterDesc.get(), filterPtr.get(), // weights + outputDesc.get().data(), yOutputPtr.get(), // output + nullptr, nullptr, + initialCDesc.get(), ycOutputPtr.get(), + static_cast(workspace.get()), workspace.size_in_bytes())); +} + +}}}}} /* namespace cv::dnn::cuda4dnn::csl::cudnn */ + +#endif //OPENCV_DNN_CUDA4DNN_CSL_CUDNN_RECURRENT_HPP \ No newline at end of file diff --git a/modules/dnn/src/cuda4dnn/csl/memory.hpp b/modules/dnn/src/cuda4dnn/csl/memory.hpp index 40918cd4b3..683ed62059 100644 --- a/modules/dnn/src/cuda4dnn/csl/memory.hpp +++ b/modules/dnn/src/cuda4dnn/csl/memory.hpp @@ -68,7 +68,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { } } }); - /* std::shared_ptr::reset invokves the deleter if an exception occurs; hence, we don't + /* std::shared_ptr::reset invokes the deleter if an exception occurs; hence, we don't * need to have a try-catch block to free the allocated device memory */ diff --git a/modules/dnn/src/cuda4dnn/csl/pointer.hpp b/modules/dnn/src/cuda4dnn/csl/pointer.hpp index 45bf94bf0a..4d7a232093 100644 --- a/modules/dnn/src/cuda4dnn/csl/pointer.hpp +++ b/modules/dnn/src/cuda4dnn/csl/pointer.hpp @@ -147,7 +147,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { /* host const void pointer to const void device pointer */ CUDA4DNN_HOST_DEVICE explicit DevicePtr(pointer ptr_) noexcept : ptr{ ptr_ } { } - /* allow any device pointer to be implicitly convereted to void device pointer */ + /* allow any device pointer to be implicitly converted to void device pointer */ template CUDA4DNN_HOST_DEVICE DevicePtr(DevicePtr ptr_) noexcept : ptr{ ptr_.get() } { } @@ -199,7 +199,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { /* host pointer to device pointer */ CUDA4DNN_HOST_DEVICE explicit DevicePtr(pointer ptr_) noexcept : ptr{ ptr_ } { } - /* allow any device pointer to mutable memory to be implicitly convereted to void device pointer */ + /* allow any device pointer to mutable memory to be implicitly converted to void device pointer */ template ::value, bool>::type = false> CUDA4DNN_HOST_DEVICE DevicePtr(DevicePtr ptr_) noexcept : ptr { ptr_.get() } { } diff --git a/modules/dnn/src/cuda4dnn/csl/tensor_ops.hpp b/modules/dnn/src/cuda4dnn/csl/tensor_ops.hpp index 4ee0e8ab77..27f8306bf3 100644 --- a/modules/dnn/src/cuda4dnn/csl/tensor_ops.hpp +++ b/modules/dnn/src/cuda4dnn/csl/tensor_ops.hpp @@ -18,6 +18,7 @@ #include "cudnn/softmax.hpp" #include "cudnn/transform.hpp" #include "cudnn/transpose_convolution.hpp" +#include "cudnn/recurrent.hpp" #include @@ -472,6 +473,90 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { TensorTransformDescriptor transDesc; }; + template + class LSTM + { + using TensorDescriptor = cudnn::TensorDescriptor; + using DropoutDescriptor = cudnn::DropoutDescriptor; + using RNNDescriptor = cudnn::RNNDescriptor; + using FilterDescriptor = cudnn::FilterDescriptor; + using TensorDescriptorsArray = cudnn::TensorDescriptorsArray; + + public: + using RNNMode = typename RNNDescriptor::RNNMode; + + struct params_type + { + std::vector weights_shape; + + int seqLength; + int numLayers; + int hiddenSize; + int inputSize; + int miniBatch; + bool bidirectional; + + float dropout; + RNNMode type; + }; + + LSTM() = default; + LSTM(const LSTM&) = delete; + LSTM(LSTM&&) = default; + LSTM(cudnn::Handle handle, const params_type& params) + : cudnnHandle(std::move(handle)), seqLength{params.seqLength}, + inputDesc(seqLength, {params.miniBatch, params.inputSize, 1}), + outputDesc(seqLength, + {params.miniBatch, + params.bidirectional ? params.hiddenSize * 2 : params.hiddenSize, + 1}) + { + dropoutDesc = DropoutDescriptor(cudnnHandle, params.dropout); + filterDesc = FilterDescriptor(params.weights_shape); + rnnDesc = RNNDescriptor(cudnnHandle, params.type, params.hiddenSize, + params.numLayers, params.bidirectional, dropoutDesc); + + int num_direction = params.bidirectional ? 2 : 1; + h0TensorDesc = TensorDescriptor( + {num_direction, params.miniBatch, params.hiddenSize}); + c0TensorDesc = TensorDescriptor( + {num_direction, params.miniBatch, params.hiddenSize}); + + // Get amount of work space required to execute the RNN described by rnnDesc + // with input dimensions defined by inputDesc + csl::WorkspaceBuilder builder; + builder.require(cudnn::getRNNWorkspaceSize(cudnnHandle, rnnDesc, seqLength, inputDesc)); + scratch_mem_in_bytes = builder.required_workspace_size(); + } + + LSTM& operator=(const LSTM&) = delete; + LSTM& operator=(LSTM&&) = default; + + void inference(TensorView input, TensorSpan y_output, TensorSpan yc_output, TensorView filters, + TensorView h0, TensorView c0, WorkspaceInstance workspace) + { + cudnn::LSTMForward(cudnnHandle, rnnDesc, filterDesc, filters.get(), inputDesc, + input.get(), h0TensorDesc, h0.get(), c0TensorDesc, c0.get(), + seqLength, outputDesc, y_output.get(), yc_output.get(), workspace); + } + + std::size_t get_workspace_memory_in_bytes() const noexcept { return scratch_mem_in_bytes; } + + private: + cudnn::Handle cudnnHandle; + std::size_t scratch_mem_in_bytes{0}; + int seqLength; + + RNNDescriptor rnnDesc; + DropoutDescriptor dropoutDesc; + + FilterDescriptor filterDesc; + TensorDescriptor h0TensorDesc, c0TensorDesc; + + TensorDescriptorsArray inputDesc; + TensorDescriptorsArray outputDesc; + }; + }}}} /* namespace cv::dnn::cuda4dnn::csl */ #endif /* OPENCV_DNN_SRC_CUDA4DNN_CSL_TENSOR_OPS_HPP */ diff --git a/modules/dnn/src/cuda4dnn/kernels/activations.hpp b/modules/dnn/src/cuda4dnn/kernels/activations.hpp index ef1f6da3e6..6958b93d5e 100644 --- a/modules/dnn/src/cuda4dnn/kernels/activations.hpp +++ b/modules/dnn/src/cuda4dnn/kernels/activations.hpp @@ -123,6 +123,14 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { template void exp(const csl::Stream& stream, csl::Span output, csl::View input, T normScale, T normShift); + template + void sign(const csl::Stream& stream, csl::Span output, csl::View input); + + template + void shrink(const csl::Stream& stream, csl::Span output, csl::View input, T bias, T lambd); + + template + void reciprocal(const csl::Stream& stream, csl::Span output, csl::View input); }}}} /* namespace cv::dnn::cuda4dnn::kernels */ #endif /* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_ACTIVATIONS_HPP */ diff --git a/modules/dnn/src/cuda4dnn/primitives/activation.hpp b/modules/dnn/src/cuda4dnn/primitives/activation.hpp index 39ebf513a7..564202e8c0 100644 --- a/modules/dnn/src/cuda4dnn/primitives/activation.hpp +++ b/modules/dnn/src/cuda4dnn/primitives/activation.hpp @@ -584,6 +584,52 @@ namespace cv { namespace dnn { namespace cuda4dnn { const T normScale, normShift; }; + template + class ShrinkOp final : public BaseOp { + public: + ShrinkOp(csl::Stream stream_, T bias_, T lambd_) + : stream(std::move(stream_)), bias{ bias_ }, lambd{ lambd_ } { } + + void calculate(csl::TensorSpan output, csl::TensorView input) const + { + kernels::shrink(stream, output, input, bias, lambd); + } + + private: + csl::Stream stream; + const T bias, lambd; + }; + + template + class SignOp final : public BaseOp { + public: + SignOp(csl::Stream stream_) + : stream(std::move(stream_)) { } + + void calculate(csl::TensorSpan output, csl::TensorView input) const + { + kernels::sign(stream, output, input); + } + + private: + csl::Stream stream; + }; + + template + class ReciprocalOp final : public BaseOp { + public: + ReciprocalOp(csl::Stream stream_) + : stream(std::move(stream_)) { } + + void calculate(csl::TensorSpan output, csl::TensorView input) const + { + kernels::reciprocal(stream, output, input); + } + + private: + csl::Stream stream; + }; + }}} /* namespace cv::dnn::cuda4dnn */ #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_ACTIVATION_HPP */ diff --git a/modules/dnn/src/cuda4dnn/primitives/recurrent_cells.hpp b/modules/dnn/src/cuda4dnn/primitives/recurrent_cells.hpp new file mode 100644 index 0000000000..5cba788008 --- /dev/null +++ b/modules/dnn/src/cuda4dnn/primitives/recurrent_cells.hpp @@ -0,0 +1,97 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_CELLS_HPP +#define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_CELLS_HPP + +#include "../../op_cuda.hpp" + +#include "../csl/cudnn.hpp" +#include "../csl/tensor_ops.hpp" +#include "../csl/cudnn/recurrent.hpp" + +namespace cv { namespace dnn { namespace cuda4dnn { + +struct RNNConfiguration +{ + int seqLength; + int numLayers; + int hiddenSize; + int inputSize; + int miniBatch; + bool bidirectional; +}; + +template +class LSTMOp final : public CUDABackendNode +{ +public: + using wrapper_type = GetCUDABackendWrapperType; + + LSTMOp(csl::Stream stream_, csl::cudnn::Handle handle, const Mat& filters, const Mat& h0, + const Mat& c0, const RNNConfiguration& config) + : stream(std::move(stream_)) + { + typename csl::LSTM::params_type params{ + {filters.total(), 1, 1}, // reshape + config.seqLength, + config.numLayers, + config.hiddenSize, + config.inputSize, + config.miniBatch, + config.bidirectional, + 0.0, /* dropout */ + csl::cudnn::RNNDescriptor::RNNMode::LSTM + }; + + lstm = csl::LSTM(handle, params); + auto correct_shape_filters = filters.reshape(1, {static_cast(filters.total()), 1, 1}); + filtersTensor = csl::makeTensorHeader(correct_shape_filters); + csl::copyMatToTensor(correct_shape_filters, filtersTensor, stream); + + h0Tensor = csl::makeTensorHeader(h0); + csl::copyMatToTensor(h0, h0Tensor, stream); + + c0Tensor = csl::makeTensorHeader(c0); + csl::copyMatToTensor(c0, c0Tensor, stream); + + csl::WorkspaceBuilder builder; + builder.require(lstm.get_workspace_memory_in_bytes()); + } + + void forward(const std::vector>& inputs, + const std::vector>& outputs, + csl::Workspace& workspace) override + { + CV_Assert(inputs.size() == 1 && !outputs.empty()); + + auto input_wrapper = inputs[0].dynamicCast(); + auto input = input_wrapper->getView(); + + auto y_output_wrapper = outputs[0].dynamicCast(); + auto y_output = y_output_wrapper->getSpan(); + + Ptr yc_output_wrapper = outputs.size() == 2 ? outputs[1].dynamicCast() : Ptr(); + csl::TensorSpan yc_output = yc_output_wrapper.empty() ? csl::TensorSpan() : yc_output_wrapper->getSpan(); + + csl::WorkspaceAllocator allocator(workspace); + lstm.inference(input, y_output, yc_output, filtersTensor, h0Tensor, c0Tensor, allocator.get_instance()); + } + + std::size_t get_workspace_memory_in_bytes() const noexcept override + { + return lstm.get_workspace_memory_in_bytes(); + } + +private: + csl::LSTM lstm; + csl::Stream stream; + csl::Tensor filtersTensor; + csl::Tensor h0Tensor; + csl::Tensor c0Tensor; +}; + +}}} /* namespace cv::dnn::cuda4dnn */ + +#endif //OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_RECURRENT_CELLS_HPP \ No newline at end of file diff --git a/modules/dnn/src/darknet/darknet_io.cpp b/modules/dnn/src/darknet/darknet_io.cpp index 99715df829..520f3c94be 100644 --- a/modules/dnn/src/darknet/darknet_io.cpp +++ b/modules/dnn/src/darknet/darknet_io.cpp @@ -376,7 +376,7 @@ namespace cv { int begin[] = {0, split_size * group_id, 0, 0}; cv::dnn::DictValue paramBegin = cv::dnn::DictValue::arrayInt(begin, 4); - int end[] = {-1, begin[1] + split_size, -1, -1}; + int end[] = {INT_MAX, begin[1] + split_size, INT_MAX, INT_MAX}; cv::dnn::DictValue paramEnd = cv::dnn::DictValue::arrayInt(end, 4); darknet::LayerParameter lp; @@ -791,7 +791,7 @@ namespace cv { if (layers_vec.size() > 1) { // layer ids in layers_vec - inputs of Slice layers - // after adding offset to layers_vec: layer ids - ouputs of Slice layers + // after adding offset to layers_vec: layer ids - outputs of Slice layers for (size_t k = 0; k < layers_vec.size(); ++k) layers_vec[k] += layers_vec.size(); diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 954ada50f2..8c397b14f2 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -1,5849 +1,10 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. #include "precomp.hpp" -#include "op_halide.hpp" -#include "op_inf_engine.hpp" -#include "ie_ngraph.hpp" -#include "op_vkcom.hpp" -#include "op_cuda.hpp" -#include "op_webnn.hpp" -#ifdef HAVE_CUDA -#include "cuda4dnn/init.hpp" -#include "cuda4dnn/primitives/eltwise.hpp" // required by fuseLayers -#endif - -#include "halide_scheduler.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -namespace cv { -namespace dnn { -CV__DNN_INLINE_NS_BEGIN - -static size_t DNN_NETWORK_DUMP = utils::getConfigurationParameterSizeT("OPENCV_DNN_NETWORK_DUMP", 0); - -// this option is useful to run valgrind memory errors detection -static bool DNN_DISABLE_MEMORY_OPTIMIZATIONS = utils::getConfigurationParameterBool("OPENCV_DNN_DISABLE_MEMORY_OPTIMIZATIONS", false); - -#ifdef HAVE_OPENCL -static bool DNN_OPENCL_ALLOW_ALL_DEVICES = utils::getConfigurationParameterBool("OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES", false); -#endif - -static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT", -#ifdef HAVE_INF_ENGINE - (size_t)DNN_BACKEND_INFERENCE_ENGINE -#else - (size_t)DNN_BACKEND_OPENCV -#endif -); - -// Additional checks (slowdowns execution!) -static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false); -static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false); -static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false); - -using std::vector; -using std::map; -using std::make_pair; -using std::set; -using std::string; - -//================================================================================================== - -class BackendRegistry -{ -public: - typedef std::vector< std::pair > BackendsList; - const BackendsList & getBackends() const { return backends; } - static BackendRegistry & getRegistry() - { - static BackendRegistry impl; - return impl; - } - -#ifdef HAVE_INF_ENGINE - static inline bool checkIETarget(Target target) - { -#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R3) - // Lightweight detection - const std::vector devices = getCore("").GetAvailableDevices(); - for (std::vector::const_iterator i = devices.begin(); i != devices.end(); ++i) - { - if (std::string::npos != i->find("MYRIAD") && target == DNN_TARGET_MYRIAD) - return true; - if (std::string::npos != i->find("HDDL") && target == DNN_TARGET_HDDL) - return true; - else if (std::string::npos != i->find("FPGA") && target == DNN_TARGET_FPGA) - return true; - else if (std::string::npos != i->find("CPU") && target == DNN_TARGET_CPU) - return true; - else if (std::string::npos != i->find("GPU") && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) - return true; - } - return false; -#else - cv::dnn::Net net; - cv::dnn::LayerParams lp; - lp.set("kernel_size", 1); - lp.set("num_output", 1); - lp.set("bias_term", false); - lp.type = "Convolution"; - lp.name = "testLayer"; - lp.blobs.push_back(Mat({1, 2, 1, 1}, CV_32F, Scalar(1))); - net.addLayerToPrev(lp.name, lp.type, lp); - net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE); - net.setPreferableTarget(target); - static int inpDims[] = {1, 2, 3, 4}; - net.setInput(cv::Mat(4, &inpDims[0], CV_32FC1, cv::Scalar(0))); - try - { - net.forward(); - } - catch(const std::exception& e) - { - CV_LOG_INFO(NULL, "checkIETarget(" << (int)target << ") has failed with message: " << e.what()); - return false; - } - return true; -#endif - } -#endif - -private: - BackendRegistry() - { -#ifdef HAVE_HALIDE - backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_CPU)); -# ifdef HAVE_OPENCL - if (cv::ocl::useOpenCL()) - backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL)); -# endif -#endif // HAVE_HALIDE - -#ifdef HAVE_INF_ENGINE - if (checkIETarget(DNN_TARGET_CPU)) { -#ifdef HAVE_DNN_NGRAPH - backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_CPU)); -#endif - } - if (checkIETarget(DNN_TARGET_MYRIAD)) { -#ifdef HAVE_DNN_NGRAPH - backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_MYRIAD)); -#endif - } - if (checkIETarget(DNN_TARGET_HDDL)) { -#ifdef HAVE_DNN_NGRAPH - backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_HDDL)); -#endif - } -#ifdef HAVE_OPENCL - if (cv::ocl::useOpenCL() && ocl::Device::getDefault().isIntel()) - { - if (checkIETarget(DNN_TARGET_OPENCL)) { -#ifdef HAVE_DNN_NGRAPH - backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL)); -#endif - } - if (checkIETarget(DNN_TARGET_OPENCL_FP16)) { -#ifdef HAVE_DNN_NGRAPH - backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL_FP16)); -#endif - } - } -#endif -#endif // HAVE_INF_ENGINE - -#ifdef HAVE_WEBNN - if (haveWebnn()) - { - backends.push_back(std::make_pair(DNN_BACKEND_WEBNN, DNN_TARGET_CPU)); - } -#endif // HAVE_WEBNN - -#ifdef HAVE_OPENCL - if (cv::ocl::useOpenCL()) - { - backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)); - backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)); - } -#endif - - backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)); - -#ifdef HAVE_VULKAN - if (haveVulkan()) - backends.push_back(std::make_pair(DNN_BACKEND_VKCOM, DNN_TARGET_VULKAN)); -#endif - -#ifdef HAVE_CUDA - if (haveCUDA()) - { - backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA)); - backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16)); - } -#endif - } - - BackendsList backends; -}; - - -std::vector< std::pair > getAvailableBackends() -{ - return BackendRegistry::getRegistry().getBackends(); -} - -std::vector getAvailableTargets(Backend be) -{ - if (be == DNN_BACKEND_DEFAULT) - be = (Backend)PARAM_DNN_BACKEND_DEFAULT; -#ifdef HAVE_INF_ENGINE - if (be == DNN_BACKEND_INFERENCE_ENGINE) - be = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; -#endif - - std::vector result; - const BackendRegistry::BackendsList all_backends = getAvailableBackends(); - for(BackendRegistry::BackendsList::const_iterator i = all_backends.begin(); i != all_backends.end(); ++i ) - { - if (i->first == be) - result.push_back(i->second); - } - return result; -} - -//================================================================================================== - -namespace -{ - struct LayerShapes - { - ShapesVec in, out, internal; - // No guarantees that layer which support in-place computations - // will be computed in-place (input.data_ptr == output.data_ptr). - // If layer said that it could work in-place and layers after it - // no longer use input blob, we'll set output = input. - bool supportInPlace; - LayerShapes() {supportInPlace = false;} - }; -} - -Mat blobFromImage(InputArray image, double scalefactor, const Size& size, - const Scalar& mean, bool swapRB, bool crop, int ddepth) -{ - CV_TRACE_FUNCTION(); - Mat blob; - blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop, ddepth); - return blob; -} - -void blobFromImage(InputArray image, OutputArray blob, double scalefactor, - const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth) -{ - CV_TRACE_FUNCTION(); - std::vector images(1, image.getMat()); - blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth); -} - -Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size, - const Scalar& mean, bool swapRB, bool crop, int ddepth) -{ - CV_TRACE_FUNCTION(); - Mat blob; - blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth); - return blob; -} - -void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor, - Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth) -{ - CV_TRACE_FUNCTION(); - CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U"); - if (ddepth == CV_8U) - { - CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth"); - CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth"); - } - - std::vector images; - images_.getMatVector(images); - CV_Assert(!images.empty()); - for (size_t i = 0; i < images.size(); i++) - { - Size imgSize = images[i].size(); - if (size == Size()) - size = imgSize; - if (size != imgSize) - { - if(crop) - { - float resizeFactor = std::max(size.width / (float)imgSize.width, - size.height / (float)imgSize.height); - resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR); - Rect crop(Point(0.5 * (images[i].cols - size.width), - 0.5 * (images[i].rows - size.height)), - size); - images[i] = images[i](crop); - } - else - resize(images[i], images[i], size, 0, 0, INTER_LINEAR); - } - if(images[i].depth() == CV_8U && ddepth == CV_32F) - images[i].convertTo(images[i], CV_32F); - Scalar mean = mean_; - if (swapRB) - std::swap(mean[0], mean[2]); - - images[i] -= mean; - images[i] *= scalefactor; - } - - size_t nimages = images.size(); - Mat image0 = images[0]; - int nch = image0.channels(); - CV_Assert(image0.dims == 2); - if (nch == 3 || nch == 4) - { - int sz[] = { (int)nimages, nch, image0.rows, image0.cols }; - blob_.create(4, sz, ddepth); - Mat blob = blob_.getMat(); - Mat ch[4]; - - for(size_t i = 0; i < nimages; i++ ) - { - const Mat& image = images[i]; - CV_Assert(image.depth() == blob_.depth()); - nch = image.channels(); - CV_Assert(image.dims == 2 && (nch == 3 || nch == 4)); - CV_Assert(image.size() == image0.size()); - - for( int j = 0; j < nch; j++ ) - ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j)); - if(swapRB) - std::swap(ch[0], ch[2]); - split(image, ch); - } - } - else - { - CV_Assert(nch == 1); - int sz[] = { (int)nimages, 1, image0.rows, image0.cols }; - blob_.create(4, sz, ddepth); - Mat blob = blob_.getMat(); - - for(size_t i = 0; i < nimages; i++ ) - { - const Mat& image = images[i]; - CV_Assert(image.depth() == blob_.depth()); - nch = image.channels(); - CV_Assert(image.dims == 2 && (nch == 1)); - CV_Assert(image.size() == image0.size()); - - image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0))); - } - } -} - -void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_) -{ - CV_TRACE_FUNCTION(); - - //A blob is a 4 dimensional matrix in floating point precision - //blob_[0] = batchSize = nbOfImages - //blob_[1] = nbOfChannels - //blob_[2] = height - //blob_[3] = width - CV_Assert(blob_.depth() == CV_32F); - CV_Assert(blob_.dims == 4); - - images_.create(cv::Size(1, blob_.size[0]), blob_.depth()); - - std::vector vectorOfChannels(blob_.size[1]); - for (int n = 0; n < blob_.size[0]; ++n) - { - for (int c = 0; c < blob_.size[1]; ++c) - { - vectorOfChannels[c] = getPlane(blob_, n, c); - } - cv::merge(vectorOfChannels, images_.getMatRef(n)); - } -} - -#ifdef HAVE_OPENCL -class OpenCLBackendWrapper : public BackendWrapper -{ -public: - OpenCLBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL) - { - m.copyTo(umat); - host = &m; - hostDirty = false; - } - - OpenCLBackendWrapper(const Ptr& baseBuffer, Mat& m) - : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL) - { - Ptr base = baseBuffer.dynamicCast(); - CV_Assert(!base.empty()); - - host = &m; - - int shape[] = {1, (int)base->umat.total()}; - umat = base->umat.reshape(1, 2, &shape[0]) - .colRange(0, host->total()) - .reshape(1, host->dims, &host->size[0]); - hostDirty = false; - } - - static Ptr create(Mat& m) - { - return Ptr(new OpenCLBackendWrapper(m)); - } - - static Ptr create(const Ptr& baseBuffer, Mat& m) - { - return Ptr(new OpenCLBackendWrapper(baseBuffer, m)); - } - - static std::vector getUMatVector(const std::vector >& wrappers) - { - const int numWrappers = wrappers.size(); - std::vector mats(wrappers.size()); - for (int i = 0; i < numWrappers; ++i) - { - Ptr umatWrapper = wrappers[i].dynamicCast(); - CV_Assert(!umatWrapper.empty()); - umatWrapper->copyToDevice(); - mats[i] = umatWrapper->umat; - } - return mats; - } - - // Replaces all umats in wrappers to specific ones. - static void update(const std::vector >& wrappers, - const std::vector& umats) - { - CV_Assert(wrappers.size() == umats.size()); - for (int i = 0, n = umats.size(); i < n; ++i) - { - Ptr umatWrapper = wrappers[i].dynamicCast(); - CV_Assert(!umatWrapper.empty()); - umatWrapper->umat = umats[i]; - } - } - - ~OpenCLBackendWrapper() {} - - // Copies data from device to a host memory. - virtual void copyToHost() CV_OVERRIDE - { - umat.copyTo(*host); - } - - virtual void setHostDirty() CV_OVERRIDE - { - hostDirty = true; - }; - - void copyToDevice() - { - if (hostDirty) - { - host->copyTo(umat); - hostDirty = false; - } - } - -private: - UMat umat; - Mat* host; - bool hostDirty; -}; -#endif - -struct LayerPin -{ - int lid; - int oid; - - LayerPin(int layerId = -1, int outputId = -1) - : lid(layerId), oid(outputId) {} - - bool valid() const - { - return (lid >= 0 && oid >= 0); - } - - bool equal(const LayerPin &r) const - { - return (lid == r.lid && oid == r.oid); - } - - bool operator<(const LayerPin &r) const - { - return lid < r.lid || (lid == r.lid && oid < r.oid); - } - - bool operator ==(const LayerPin &r) const - { - return lid == r.lid && oid == r.oid; - } -}; - -struct LayerData -{ - LayerData() : id(-1), dtype(CV_32F), skip(false), flag(0) {} - LayerData(int _id, const String &_name, const String &_type, const int &_dtype, LayerParams &_params) - : id(_id), name(_name), type(_type), dtype(_dtype), params(_params), skip(false), flag(0) - { - CV_TRACE_FUNCTION(); - - //add logging info - params.name = name; - params.type = type; - } - - int id; - String name; - String type; - int dtype; // Datatype of output blobs. - LayerParams params; - - std::vector inputBlobsId; - std::set inputLayersId; - std::set requiredOutputs; - std::vector consumers; - std::vector > outputBlobsWrappers; - std::vector > inputBlobsWrappers; - std::vector > internalBlobsWrappers; - -#ifdef HAVE_CUDA - /* output ids which must be transferred to the host in the background - * after the completion of the forward pass of the layer - */ - std::vector cudaD2HBackgroundTransfers; -#endif - - Ptr layerInstance; - std::vector outputBlobs; - std::vector inputBlobs; - std::vector internals; - // Computation nodes of implemented backends (except DEFAULT). - std::map > backendNodes; - // Flag for skip layer computation for specific backend. - bool skip; - - int flag; - - Ptr getLayerInstance() - { - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(type, "type", type.c_str()); - - if (layerInstance) - return layerInstance; - - layerInstance = LayerFactory::createLayerInstance(type, params); - if (!layerInstance) - { - CV_Error(Error::StsError, "Can't create layer \"" + name + "\" of type \"" + type + "\""); - } - - return layerInstance; - } -}; - -//fake layer containing network input blobs -struct DataLayer : public Layer -{ - DataLayer() : Layer() - { - skip = false; - } - - virtual bool supportBackend(int backendId) CV_OVERRIDE - { - return backendId == DNN_BACKEND_OPENCV; - } - - void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE - { - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - - // FIXIT: add wrapper without exception suppression - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), - forward_ocl(inputs_arr, outputs_arr, internals_arr)) - - bool isFP16 = outputs_arr.depth() == CV_16S; - - std::vector outputs, internals; - outputs_arr.getMatVector(outputs); - internals_arr.getMatVector(internals); - - for (int i = 0; i < inputsData.size(); ++i) - { - double scale = scaleFactors[i]; - Scalar& mean = means[i]; - - CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4); - if (isFP16) - CV_CheckTypeEQ(outputs[i].type(), CV_16SC1, ""); - else - CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, ""); - - bool singleMean = true; - for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j) - { - singleMean = mean[j] == mean[j - 1]; - } - - if (singleMean) - { - if (isFP16) - { - Mat input_f32; - inputsData[i].convertTo(input_f32, CV_32F, scale, -mean[0] * scale); - convertFp16(input_f32, outputs[i]); - } - else - { - inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale); - } - } - else - { - for (int n = 0; n < inputsData[i].size[0]; ++n) - { - for (int c = 0; c < inputsData[i].size[1]; ++c) - { - Mat inp = getPlane(inputsData[i], n, c); - Mat out = getPlane(outputs[i], n, c); - if (isFP16) - { - Mat input_f32; - inp.convertTo(input_f32, CV_32F, scale, -mean[c] * scale); - convertFp16(input_f32, out); - } - else - { - inp.convertTo(out, CV_32F, scale, -mean[c] * scale); - } - } - } - } - } - } - -#ifdef HAVE_OPENCL - bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) - { - bool isFP16 = outputs_.depth() == CV_16S; - - std::vector outputs; - outputs_.getUMatVector(outputs); - - for (int i = 0; i < inputsData.size(); ++i) - { - Mat inputData = inputsData[i]; - - double scale = scaleFactors[i]; - Scalar& mean = means[i]; - - CV_Assert(mean == Scalar() || inputData.size[1] <= 4); - if (isFP16) - CV_CheckTypeEQ(outputs[i].type(), CV_16SC1, ""); - else - CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, ""); - - bool singleMean = true; - for (int j = 1; j < std::min(4, inputData.size[1]) && singleMean; ++j) - { - singleMean = mean[j] == mean[j - 1]; - } - - if (singleMean) - { - if (isFP16) - { - UMat input_i; - inputData.convertTo(input_i, CV_32F, scale, -mean[0] * scale); - convertFp16(input_i, outputs[i]); - } - else - { - inputData.convertTo(outputs[i], CV_32F, scale, -mean[0] * scale); - } - } - else - { - for (int n = 0; n < inputData.size[0]; ++n) - { - for (int c = 0; c < inputData.size[1]; ++c) - { - Mat inp = getPlane(inputData, n, c); - - std::vector plane(4, Range::all()); - plane[0] = Range(n, n + 1); - plane[1] = Range(c, c + 1); - UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size); - - if (isFP16) - { - UMat input_i; - inp.convertTo(input_i, CV_32F, scale, -mean[c] * scale); - convertFp16(input_i, out); - } - else - { - inp.convertTo(out, CV_32F, scale, -mean[c] * scale); - } - } - } - } - } - return true; - } -#endif - - int outputNameToIndex(const String& tgtName) CV_OVERRIDE - { - int idx = (int)(std::find(outNames.begin(), outNames.end(), tgtName) - outNames.begin()); - return (idx < (int)outNames.size()) ? idx : -1; - } - - void setNames(const std::vector &names) - { - outNames.assign(names.begin(), names.end()); - shapes.clear(); shapes.resize(outNames.size()); - } - - void setInputShape(const String& tgtName, const MatShape& shape) - { - std::vector::const_iterator it = std::find(outNames.begin(), outNames.end(), tgtName); - CV_Check(tgtName, it != outNames.end(), "Unknown input"); - int idx = (int)(it - outNames.begin()); - - CV_Assert(idx < (int)shapes.size()); - CV_Check(tgtName, shapes[idx].empty(), "Input shape redefinition is not allowed"); - shapes[idx] = shape; - } - - bool getMemoryShapes(const std::vector &inputs, - const int requiredOutputs, - std::vector &outputs, - std::vector &internals) const CV_OVERRIDE - { - CV_Assert(inputs.size() == requiredOutputs); - outputs.assign(inputs.begin(), inputs.end()); - return false; - } - - virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE - { - std::vector outputs; - outputs_arr.getMatVector(outputs); - - CV_Assert_N(outputs.size() == scaleFactors.size(), outputs.size() == means.size(), - inputsData.size() == outputs.size()); - skip = true; - for (int i = 0; skip && i < inputsData.size(); ++i) - { - if (inputsData[i].data != outputs[i].data || scaleFactors[i] != 1.0 || means[i] != Scalar()) - skip = false; - } - } - - - std::vector outNames; - std::vector shapes; - // Preprocessing parameters for each network's input. - std::vector scaleFactors; - std::vector means; - std::vector inputsData; - bool skip; -}; - -struct BlobManager -{ -public: - // Increase references counter to layer output. - void addReference(const LayerPin& lp) - { - std::map::iterator it = refCounter.find(lp); - if (it == refCounter.end()) - refCounter[lp] = 1; - else - it->second += 1; - } - - void addReferences(const std::vector& pins) - { - for (int i = 0; i < pins.size(); i++) - { - addReference(pins[i]); - } - } - - // Returns number of references to allocated memory that used in specific - // layer blob. - int numReferences(const LayerPin& lp) - { - std::map::const_iterator mapIt = reuseMap.find(lp); - CV_Assert(mapIt != reuseMap.end()); - LayerPin memHost = mapIt->second; - - std::map::const_iterator refIt = refCounter.find(memHost); - CV_Assert(refIt != refCounter.end()); - return refIt->second; - } - - // Reuse data allocated in inside the blob. - void reuse(const LayerPin& host, const LayerPin& user) - { - CV_Assert(reuseMap.find(user) == reuseMap.end()); - CV_Assert(reuseMap.find(host) != reuseMap.end()); - LayerPin memHost = reuseMap[host]; - reuseMap[user] = memHost; - if (refCounter.find(memHost) != refCounter.end()) - { - std::map::iterator userRefIt = refCounter.find(user); - if (userRefIt != refCounter.end()) - { - refCounter[memHost] += userRefIt->second; - refCounter.erase(userRefIt); - } - else - refCounter[memHost] += 1; - } - } - - // Decrease references counter to allocated memory inside specific blob. - void releaseReference(const LayerPin& lp) - { - std::map::const_iterator mapIt = reuseMap.find(lp); - CV_Assert(mapIt != reuseMap.end()); - - std::map::iterator refIt = refCounter.find(mapIt->second); - CV_Assert(refIt != refCounter.end()); - CV_Assert(refIt->second > 0); - refIt->second -= 1; - } - - void releaseReferences(const std::vector& pins) - { - for (int i = 0; i < pins.size(); i++) - { - releaseReference(pins[i]); - } - } - - void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, const int& dtype) - { - if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS) - { - Mat bestBlob; - LayerPin bestBlobPin; - - std::map::const_iterator hostIt; - std::map::const_iterator refIt; - - const int targetTotal = total(shape); - int bestBlobTotal = INT_MAX; - - for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt) - { - refIt = refCounter.find(hostIt->first); - // Use only blobs that had references before because if not, - // it might be used as output. - if (refIt != refCounter.end() && refIt->second == 0) - { - const Mat& unusedBlob = hostIt->second; - if (unusedBlob.total() >= targetTotal && - unusedBlob.total() < bestBlobTotal && - unusedBlob.type() == dtype) - { - bestBlobPin = hostIt->first; - bestBlob = unusedBlob; - bestBlobTotal = unusedBlob.total(); - } - } - } - if (!bestBlob.empty()) - { - reuse(bestBlobPin, lp); - dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape); - return; - } - } - - { - // if dst already has been allocated with total(shape) elements, - // it won't be recreated and pointer of dst.data remains the same. - dst.create(shape, dtype); - addHost(lp, dst); - } - } - - void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes, - std::vector& pinsForInternalBlobs) - { - CV_TRACE_FUNCTION(); - - pinsForInternalBlobs.clear(); - - std::vector& outputBlobs = ld.outputBlobs, - &internalBlobs = ld.internals; - - const ShapesVec& outShapes = layerShapes.out, - internalShapes = layerShapes.internal; - - outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob - internalBlobs.resize(internalShapes.size()); - - CV_Assert(ld.requiredOutputs.size() <= outShapes.size()); - - // Check that layer could work in-place. - bool inPlace = false; - if (layerShapes.supportInPlace) - { - if (ld.inputBlobs.size() == 1) - { - // Get number of references to the input memory. - int numRef = numReferences(ld.inputBlobsId[0]); - // If current layer is one and only customer of this blob. - inPlace = numRef == 1; - } - } - - ShapesVec shapes(outShapes); - shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end()); - std::vector blobs; - for(int i = 0; i < outputBlobs.size(); i++) - { - blobs.push_back(&outputBlobs[i]); - } - - for(int i = 0; i < internalBlobs.size(); i++) - { - blobs.push_back(&internalBlobs[i]); - if (total(internalShapes[i])) - { - pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i)); - } - } - - addReferences(pinsForInternalBlobs); - - std::map > idxSizes; - for(int i = 0; i < shapes.size(); i++) - { - idxSizes[total(shapes[i])].push_back(i); - } - - std::map >::reverse_iterator it; - for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++) - { - for(int j = 0; j < it->second.size(); j++) - { - int index = it->second[j]; - if (total(shapes[index])) - { - LayerPin blobPin(ld.id, index); - if (index < outShapes.size() && inPlace) - { - CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index])); - ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]); - reuse(ld.inputBlobsId[0], blobPin); - } - else - reuseOrCreate(shapes[index], blobPin, *blobs[index], ld.dtype); - } - } - } - } - - // Clear internal state. Calls before an every reallocation. - void reset() - { - CV_TRACE_FUNCTION(); - - refCounter.clear(); - reuseMap.clear(); - memHosts.clear(); - } - -private: - // Register allocated memory. - void addHost(const LayerPin& lp, const Mat& mat) - { - CV_Assert(memHosts.find(lp) == memHosts.end()); - reuseMap[lp] = lp; - memHosts[lp] = mat; - } - - std::map refCounter; - // Maps pin to origin blob (for whom memory was allocated firstly). - // For origin blobs key == value. - std::map reuseMap; - std::map memHosts; -}; - -static Ptr wrapMat(int backendId, int targetId, cv::Mat& m) -{ - if (backendId == DNN_BACKEND_OPENCV) - { - if (targetId == DNN_TARGET_CPU) - return Ptr(); -#ifdef HAVE_OPENCL - else if (IS_DNN_OPENCL_TARGET(targetId)) - return OpenCLBackendWrapper::create(m); -#endif - else - CV_Error(Error::StsNotImplemented, "Unknown/unsupported target identifier"); - } - else if (backendId == DNN_BACKEND_HALIDE) - { - CV_Assert(haveHalide()); -#ifdef HAVE_HALIDE - return Ptr(new HalideBackendWrapper(targetId, m)); -#endif // HAVE_HALIDE - } - else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - { - CV_ERROR_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019; - } - else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { -#ifdef HAVE_DNN_NGRAPH - return Ptr(new NgraphBackendWrapper(targetId, m)); -#else - CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of OpenVINO / Inference Engine + nGraph"); -#endif - } - else if (backendId == DNN_BACKEND_WEBNN) - { -#ifdef HAVE_WEBNN - return Ptr(new WebnnBackendWrapper(targetId, m)); -#else - CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of WebNN"); -#endif - } - else if (backendId == DNN_BACKEND_VKCOM) - { - CV_Assert(haveVulkan()); -#ifdef HAVE_VULKAN - return Ptr(new VkComBackendWrapper(m)); -#endif // HAVE_VULKAN - } - else if (backendId == DNN_BACKEND_CUDA) - { - CV_Assert(haveCUDA()); - -#ifdef HAVE_CUDA - switch (targetId) - { - case DNN_TARGET_CUDA: - return CUDABackendWrapperFP32::create(m); - case DNN_TARGET_CUDA_FP16: - return CUDABackendWrapperFP16::create(m); - default: - CV_Assert(IS_DNN_CUDA_TARGET(targetId)); - } -#endif - } - else - CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); - return Ptr(); // TODO Error? -} - -static int g_networkId = 0; - -detail::NetImplBase::NetImplBase() - : networkId(CV_XADD(&g_networkId, 1)) - , networkDumpCounter(0) - , dumpLevel(DNN_NETWORK_DUMP) -{ - // nothing -} - -std::string detail::NetImplBase::getDumpFileNameBase() const -{ - std::string dumpFileNameBase = cv::format("ocv_dnn_net_%05d_%02d", networkId, networkDumpCounter++); - return dumpFileNameBase; -} - -struct Net::Impl : public detail::NetImplBase -{ - typedef std::map LayersShapesMap; - typedef std::map MapIdToLayerData; - - Impl() - { - //allocate fake net input layer - netInputLayer = Ptr(new DataLayer()); - LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second; - inpl.id = 0; - netInputLayer->name = inpl.name = "_input"; - inpl.type = "__NetInputLayer__"; - inpl.layerInstance = netInputLayer; - layerNameToId.insert(std::make_pair(inpl.name, inpl.id)); - - lastLayerId = 0; - netWasAllocated = false; - netWasQuantized = false; - fusion = true; - isAsync = false; - preferableBackend = DNN_BACKEND_DEFAULT; - preferableTarget = DNN_TARGET_CPU; - skipInfEngineInit = false; - hasDynamicShapes = false; - } - - Ptr netInputLayer; - std::vector blobsToKeep; - MapIdToLayerData layers; - std::map layerNameToId; - std::map outputNameToId; // use registerOutput() to populate outputs - BlobManager blobManager; - int preferableBackend; - int preferableTarget; - String halideConfigFile; - bool skipInfEngineInit; - bool hasDynamicShapes; - // Map host data to backend specific wrapper. - std::map > backendWrappers; - - int lastLayerId; - - bool netWasAllocated; - bool netWasQuantized; - bool fusion; - bool isAsync; - std::vector layersTimings; - -#ifdef HAVE_CUDA - struct CudaInfo_t - { - CudaInfo_t(cuda4dnn::csl::CSLContext ctxt, cuda4dnn::csl::Stream d2h_stream_) - : context(std::move(ctxt)), d2h_stream(std::move(d2h_stream_)) { } - cuda4dnn::csl::CSLContext context; - cuda4dnn::csl::Stream d2h_stream; - cuda4dnn::csl::Workspace workspace; - }; - - std::unique_ptr cudaInfo; -#endif - - Ptr wrap(Mat& host) - { - if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU) - return Ptr(); - - MatShape shape(host.dims); - for (int i = 0; i < host.dims; ++i) - shape[i] = host.size[i]; - - void* data = host.data; - if (backendWrappers.find(data) != backendWrappers.end()) - { - Ptr baseBuffer = backendWrappers[data]; - if (preferableBackend == DNN_BACKEND_OPENCV) - { -#ifdef HAVE_OPENCL - CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget)); - return OpenCLBackendWrapper::create(baseBuffer, host); -#else - CV_Error(Error::StsInternal, ""); -#endif - } - else if (preferableBackend == DNN_BACKEND_HALIDE) - { - CV_Assert(haveHalide()); -#ifdef HAVE_HALIDE - return Ptr(new HalideBackendWrapper(baseBuffer, shape)); -#endif - } - else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - { - CV_ERROR_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019; - } - else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { - return wrapMat(preferableBackend, preferableTarget, host); - } - else if (preferableBackend == DNN_BACKEND_WEBNN) - { -#ifdef HAVE_WEBNN - return wrapMat(preferableBackend, preferableTarget, host); -#endif - } - else if (preferableBackend == DNN_BACKEND_VKCOM) - { - #ifdef HAVE_VULKAN - return Ptr(new VkComBackendWrapper(baseBuffer, host)); - #endif - } - else if (preferableBackend == DNN_BACKEND_CUDA) - { - CV_Assert(haveCUDA()); -#ifdef HAVE_CUDA - switch (preferableTarget) - { - case DNN_TARGET_CUDA: - return CUDABackendWrapperFP32::create(baseBuffer, shape); - case DNN_TARGET_CUDA_FP16: - return CUDABackendWrapperFP16::create(baseBuffer, shape); - default: - CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget)); - } -#endif - } - else - CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); - } - - Ptr wrapper = wrapMat(preferableBackend, preferableTarget, host); - backendWrappers[data] = wrapper; - return wrapper; - } - -#ifdef HAVE_HALIDE - void compileHalide() - { - CV_TRACE_FUNCTION(); - - CV_Assert(preferableBackend == DNN_BACKEND_HALIDE); - - HalideScheduler scheduler(halideConfigFile); - std::vector< std::reference_wrapper > compileList; compileList.reserve(64); - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) - { - LayerData& ld = it->second; - Ptr layer = ld.layerInstance; - if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skip) - { - CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty()); - bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]); - if (!scheduled) - { - // Use automatic scheduling provided by layer. - layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE], - ld.inputBlobs, ld.outputBlobs, - preferableTarget); - } - compileList.emplace_back(ld); - } - } - std::atomic progress(0); - auto fn = ([&] () -> void - { - for (;;) - { - int id = progress.fetch_add(1); - if ((size_t)id >= compileList.size()) - return; - const LayerData& ld = compileList[id].get(); - Ptr node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second; - dnn::compileHalide(ld.outputBlobs, node, preferableTarget); - } - }); - size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency()); - num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads)); - std::vector threads(num_threads - 1); - for (auto& t: threads) t = std::thread(fn); - fn(); // process own tasks - for (auto& t: threads) t.join(); - } -#endif - - void clear() - { - CV_TRACE_FUNCTION(); - - MapIdToLayerData::iterator it; - for (it = layers.begin(); it != layers.end(); it++) - { - if (it->second.id != 0) { - it->second.inputBlobs.clear(); - it->second.outputBlobs.clear(); - it->second.internals.clear(); - } - it->second.skip = false; - //it->second.consumers.clear(); - Ptr currLayer = it->second.layerInstance; - - if( currLayer.empty() ) - continue; - - currLayer->unsetAttached(); - } - netWasAllocated = false; - layersTimings.clear(); - } - - void setUpNet(const std::vector& blobsToKeep_ = std::vector()) - { - CV_TRACE_FUNCTION(); - - if (dumpLevel && networkDumpCounter == 0) - { - dumpNetworkToFile(); - } - - if (preferableBackend == DNN_BACKEND_DEFAULT) - preferableBackend = (Backend)PARAM_DNN_BACKEND_DEFAULT; -#ifdef HAVE_INF_ENGINE - if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE) - preferableBackend = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; // = getInferenceEngineBackendTypeParam(); -#endif - - CV_Assert(preferableBackend != DNN_BACKEND_OPENCV || - preferableTarget == DNN_TARGET_CPU || - preferableTarget == DNN_TARGET_OPENCL || - preferableTarget == DNN_TARGET_OPENCL_FP16); - CV_Assert(preferableBackend != DNN_BACKEND_HALIDE || - preferableTarget == DNN_TARGET_CPU || - preferableTarget == DNN_TARGET_OPENCL); -#ifdef HAVE_INF_ENGINE - if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { - CV_Assert( - (preferableTarget == DNN_TARGET_CPU && (!isArmComputePlugin() || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) || - preferableTarget == DNN_TARGET_OPENCL || - preferableTarget == DNN_TARGET_OPENCL_FP16 || - preferableTarget == DNN_TARGET_MYRIAD || - preferableTarget == DNN_TARGET_HDDL || - preferableTarget == DNN_TARGET_FPGA - ); - } -#endif -#ifdef HAVE_WEBNN - if (preferableBackend == DNN_BACKEND_WEBNN) - { - CV_Assert(preferableTarget == DNN_TARGET_CPU || - preferableTarget == DNN_TARGET_OPENCL); - } -#endif - CV_Assert(preferableBackend != DNN_BACKEND_VKCOM || - preferableTarget == DNN_TARGET_VULKAN); - CV_Assert(preferableBackend != DNN_BACKEND_CUDA || - IS_DNN_CUDA_TARGET(preferableTarget)); - if (!netWasAllocated || this->blobsToKeep != blobsToKeep_) - { - if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) -#ifndef HAVE_OPENCL - { - CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU."); - preferableTarget = DNN_TARGET_CPU; - } -#else - { - if (!DNN_OPENCL_ALLOW_ALL_DEVICES) - { - // Current implementation is only valid for GPU (#11494) - if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU) - { - CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU."); - preferableTarget = DNN_TARGET_CPU; - } - else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel()) - { - CV_LOG_WARNING(NULL, - "DNN: OpenCL target with fp16 precision is not supported " - "with current OpenCL device (tested with Intel GPUs only), " - "switching to OpenCL with fp32 precision."); - preferableTarget = DNN_TARGET_OPENCL; - } - } - } -#endif - if (preferableBackend == DNN_BACKEND_VKCOM && !haveVulkan()) - { - preferableBackend = DNN_BACKEND_OPENCV; - preferableTarget = DNN_TARGET_CPU; - } - - if (preferableBackend == DNN_BACKEND_CUDA && !haveCUDA()) - { -#ifdef HAVE_CUDA - CV_LOG_WARNING(NULL, "unable to use CUDA backend; switching to CPU"); -#else - CV_LOG_WARNING(NULL, "DNN module was not built with CUDA backend; switching to CPU"); -#endif - preferableBackend = DNN_BACKEND_OPENCV; - preferableTarget = DNN_TARGET_CPU; - } - - clear(); - - if (hasDynamicShapes) - { - updateLayersShapes(); - } - - this->blobsToKeep = blobsToKeep_; - - allocateLayers(blobsToKeep_); - - MapIdToLayerData::iterator it = layers.find(0); - CV_Assert(it != layers.end()); - it->second.skip = netInputLayer->skip; - - initBackend(blobsToKeep_); - - if (!netWasAllocated) - { -#ifdef HAVE_HALIDE - if (preferableBackend == DNN_BACKEND_HALIDE) - compileHalide(); -#else - CV_Assert(preferableBackend != DNN_BACKEND_HALIDE); -#endif - } - - netWasAllocated = true; - - if (dumpLevel) - { - dumpNetworkToFile(); - } - } - } - - int getLayerId(const String &layerName) const - { - std::map::const_iterator it = layerNameToId.find(layerName); - return (it != layerNameToId.end()) ? it->second : -1; - } - - int getLayerId(int id) const - { - MapIdToLayerData::const_iterator it = layers.find(id); - return (it != layers.end()) ? id : -1; - } - - int getLayerId(DictValue &layerDesc) const - { - if (layerDesc.isInt()) - return getLayerId(layerDesc.get()); - else if (layerDesc.isString()) - return getLayerId(layerDesc.get()); - - CV_Assert(layerDesc.isInt() || layerDesc.isString()); - return -1; - } - - String getLayerName(int id) const - { - MapIdToLayerData::const_iterator it = layers.find(id); - return (it != layers.end()) ? it->second.name : "(unknown layer)"; - } - - LayerData& getLayerData(int id) const - { - MapIdToLayerData::const_iterator it = layers.find(id); - - if (it == layers.end()) - CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id)); - - return const_cast(it->second); - } - - LayerData& getLayerData(const String &layerName) const - { - int id = getLayerId(layerName); - - if (id < 0) - CV_Error(Error::StsError, "Requested layer \"" + layerName + "\" not found"); - - return getLayerData(id); - } - - LayerData& getLayerData(const DictValue &layerDesc) const - { - CV_Assert(layerDesc.isInt() || layerDesc.isString()); - if (layerDesc.isInt()) - return getLayerData(layerDesc.get()); - else /*if (layerDesc.isString())*/ - return getLayerData(layerDesc.get()); - } - - static void addLayerInput(LayerData &ld, int inNum, LayerPin from) - { - if ((int)ld.inputBlobsId.size() <= inNum) - { - ld.inputBlobsId.resize(inNum + 1); - } - else - { - LayerPin storedFrom = ld.inputBlobsId[inNum]; - if (storedFrom.valid() && !storedFrom.equal(from)) - CV_Error(Error::StsError, format("Input #%d of layer \"%s\" already was connected", - inNum, ld.name.c_str())); - } - - ld.inputBlobsId[inNum] = from; - } - - int resolvePinOutputName(LayerData &ld, const String &outName) const - { - if (outName.empty()) - return 0; - return ld.getLayerInstance()->outputNameToIndex(outName); - } - - LayerPin getPinByAlias(const String &layerName) const - { - LayerPin pin; - pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName); - - if (pin.lid >= 0) - pin.oid = resolvePinOutputName(getLayerData(pin.lid), layerName); - - return pin; - } - - std::vector getLayerOutPins(const String &layerName) const - { - int lid = (layerName.empty()) ? 0 : getLayerId(layerName); - - MapIdToLayerData::const_iterator it = layers.find(lid); - if (it == layers.end()) - CV_Error_(Error::StsOutOfRange, ("Layer #%d is not valid", lid)); - const size_t nOutputs = it->second.outputBlobs.size(); - - std::vector pins; - for (int i = 0; i < nOutputs; i++) - { - pins.push_back(LayerPin(lid, i)); - } - - return pins; - } - - // FIXIT remove dtype - int addLayer(const String &name, const String &type, const int &dtype, LayerParams ¶ms) - { - int id = getLayerId(name); - if (id >= 0) - { - if (!DNN_DIAGNOSTICS_RUN || type != "NotImplemented") - { - CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net"); - return -1; - } - else - { - LayerData& ld = layers.find(id)->second; - ld.type = type; - ld.params = params; - return -1; - } - } - - id = ++lastLayerId; - layerNameToId.insert(std::make_pair(name, id)); - layers.insert(std::make_pair(id, LayerData(id, name, type, dtype, params))); - if (params.get("has_dynamic_shapes", false)) - hasDynamicShapes = true; - - if (dtype == CV_8S) - netWasQuantized = true; - - return id; - } - - void connect(int outLayerId, int outNum, int inLayerId, int inNum) - { - CV_Assert(outLayerId < inLayerId); - LayerData &ldOut = getLayerData(outLayerId); - LayerData &ldInp = getLayerData(inLayerId); - - addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum)); - ldOut.requiredOutputs.insert(outNum); - ldOut.consumers.push_back(LayerPin(inLayerId, outNum)); - - CV_LOG_VERBOSE(NULL, 0, "DNN: connect(" << outLayerId << ":" << outNum << " ==> " << inLayerId << ":" << inNum << ")"); - } - - int registerOutput(const std::string& outputName, int layerId, int outputPort) - { - int checkLayerId = getLayerId(outputName); - if (checkLayerId >= 0) - { - if (checkLayerId == layerId) - { - if (outputPort == 0) - { - // layer name correlates with its output name - CV_LOG_DEBUG(NULL, "DNN: register output='" << outputName << "': reuse layer with the same name and id=" << layerId << " to be linked"); - outputNameToId.insert(std::make_pair(outputName, layerId)); - return checkLayerId; - } - } - CV_Error_(Error::StsBadArg, ("Layer with name='%s' already exists id=%d (to be linked with %d:%d)", outputName.c_str(), checkLayerId, layerId, outputPort)); - } -#if 0 // TODO - if (outputPort == 0) - // make alias only, need to adopt getUnconnectedOutLayers() call -#endif - LayerParams outputLayerParams; - outputLayerParams.name = outputName; - outputLayerParams.type = "Identity"; - int dtype = CV_32F; // FIXIT remove - int outputLayerId = addLayer(outputLayerParams.name, outputLayerParams.type, dtype, outputLayerParams); - connect(layerId, outputPort, outputLayerId, 0); - CV_LOG_DEBUG(NULL, "DNN: register output='" << outputName << "' id=" << outputLayerId << " defined as " << layerId << ":" << outputPort); - outputNameToId.insert(std::make_pair(outputName, outputLayerId)); - return outputLayerId; - } - - void initBackend(const std::vector& blobsToKeep_) - { - CV_TRACE_FUNCTION(); - if (preferableBackend == DNN_BACKEND_OPENCV) - { - CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget)); - } - else if (preferableBackend == DNN_BACKEND_HALIDE) - initHalideBackend(); - else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { -#ifdef HAVE_DNN_NGRAPH - initNgraphBackend(blobsToKeep_); -#else - CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Inference Engine + nGraph"); -#endif - } - else if (preferableBackend == DNN_BACKEND_WEBNN) - { -#ifdef HAVE_WEBNN - initWebnnBackend(blobsToKeep_); -#else - CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of WebNN"); -#endif - } - else if (preferableBackend == DNN_BACKEND_VKCOM) - initVkComBackend(); - else if (preferableBackend == DNN_BACKEND_CUDA) - initCUDABackend(blobsToKeep_); - else - CV_Error(Error::StsNotImplemented, cv::format("Unknown backend identifier: %d", preferableBackend)); - } - - void initHalideBackend() - { - CV_TRACE_FUNCTION(); - CV_Assert_N(preferableBackend == DNN_BACKEND_HALIDE, haveHalide()); - - // Iterator to current layer. - MapIdToLayerData::iterator it = layers.begin(); - // Iterator to base layer for fusion. In example, in case of conv+bn+relu - // it'll be a conv layer. - MapIdToLayerData::iterator baseIt = layers.begin(); - for (; it != layers.end(); it++) - { - LayerData &ldTop = it->second; - Ptr layerTop = ldTop.layerInstance; - if (!layerTop->supportBackend(preferableBackend)) - { - // Move base iterator to layer that don't support preferable - // backend to prevent fusion over layer of different backend. - baseIt = it; - continue; - } - // Try to do layers fusion. - LayerData &ldBot = baseIt->second; - Ptr layerBot = ldBot.layerInstance; - // 1. Check that bottom and top from the same backends. - if (it != layers.begin() && layerBot->supportBackend(preferableBackend)) - { - // 2. Check that current layer works in-place. - bool inPlace = ldTop.inputBlobs.size() == 1 && - ldBot.outputBlobs.size() == 1 && - ldTop.inputBlobs[0]->data == - ldBot.outputBlobs[0].data; - if (inPlace) - { - // 3. Try to attach node. - CV_Assert(!ldBot.backendNodes[preferableBackend].empty()); - Ptr fusedNode = - layerTop->tryAttach(ldBot.backendNodes[preferableBackend]); - if (!fusedNode.empty()) - { - ldTop.skip = true; - ldBot.backendNodes[preferableBackend] = fusedNode; - ldBot.outputBlobsWrappers = ldTop.outputBlobsWrappers; - continue; - } - } - } - // No layers fusion. - ldTop.skip = false; - ldTop.backendNodes[DNN_BACKEND_HALIDE] = - layerTop->initHalide(ldTop.inputBlobsWrappers); - baseIt = it; - } - } - - -#ifdef HAVE_DNN_NGRAPH - /** mark input pins as outputs from other subnetworks - * FIXIT must be done by DNN engine not ngraph. - */ - void addNgraphOutputs(LayerData &ld) - { - CV_TRACE_FUNCTION(); - - CV_LOG_DEBUG(NULL, "DNN/IE: layer of new subnet: " << ld.name << "@" << ld.type); - - Ptr layerNet; - auto it = ld.backendNodes.find(preferableBackend); - if (it != ld.backendNodes.end()) - { - Ptr node = it->second; - if (!node.empty()) - { - Ptr ieNode = node.dynamicCast(); - CV_Assert(!ieNode.empty()); CV_Assert(!ieNode->net.empty()); - layerNet = ieNode->net; - } - } - - for (int i = 0; i < ld.inputBlobsId.size(); ++i) - { - LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; - Ptr inpNode = inpLd.backendNodes[preferableBackend]; - if (!inpNode.empty()) - { - Ptr ieInpNode = inpNode.dynamicCast(); - CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty()); - if (layerNet != ieInpNode->net) - { - CV_LOG_DEBUG(NULL, "DNN/IE: pin output between subnets: " << ieInpNode->node->get_friendly_name()); - ieInpNode->net->addOutput(ieInpNode); - } - } - } - } - - void initNgraphBackend(const std::vector& blobsToKeep_) - { - CV_TRACE_FUNCTION(); - CV_CheckEQ(preferableBackend, DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, ""); - - Ptr net; - - for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); ++it) - { - const LayerData& ld = it->second; - if (ld.id == 0) - { - CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) || - (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size())); - for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) - { - InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]); - std::string outputName = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]; - outputName = ld.outputBlobsWrappers.size() > 1 ? (outputName + "." + std::to_string(i)) : outputName; - dataPtr->setName(outputName); - } - } - else - { - for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) - { - InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]); - std::string outputName = ld.outputBlobsWrappers.size() > 1 ? (ld.name + "." + std::to_string(i)) : ld.name; - dataPtr->setName(outputName); - } - } - } - - if (skipInfEngineInit) - { - Ptr node = layers[lastLayerId].backendNodes[preferableBackend]; - CV_Assert(!node.empty()); - - Ptr ieNode = node.dynamicCast(); - CV_Assert(!ieNode.empty()); - - CV_Assert(ieNode->net); - InfEngineNgraphNet& ienet = *ieNode->net; - ienet.reset(); - - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) - { - LayerData& ld = it->second; - if (ld.id == 0) - { - for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i) - { - InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.inputBlobsWrappers[i]); - dataPtr->setName(netInputLayer->outNames[i]); - } - } - else - { - for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) - { - auto it = ienet.outputsDesc.find(ld.name); - if (it != ienet.outputsDesc.end()) - { - const InferenceEngine::TensorDesc& descriptor = it->second; - InferenceEngine::DataPtr dataPtr = ngraphDataOutputNode(ld.outputBlobsWrappers[i], descriptor, ld.name); - dataPtr->setName(ld.name); - } - else - { - InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]); - dataPtr->setName(ld.name); - } - } - } - ienet.addBlobs(ld.inputBlobsWrappers); - ienet.addBlobs(ld.outputBlobsWrappers); - ld.skip = true; - } - layers[lastLayerId].skip = false; - ienet.init((Target)preferableTarget); - return; - } - - bool supportsCPUFallback = !isArmComputePlugin() && (preferableTarget == DNN_TARGET_CPU || - BackendRegistry::checkIETarget(DNN_TARGET_CPU)); - - // Build Inference Engine networks from sets of layers that support this - // backend. Split a whole model on several Inference Engine networks if - // some of layers are not implemented. - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) - { - LayerData& ld = it->second; - - CV_LOG_DEBUG(NULL, "DNN/IE: processing layer " << ld.name << "@" << ld.type << " (" << ld.id << ") ..."); - - if (ld.id == 0 && ld.skip) - { - CV_LOG_DEBUG(NULL, "DNN/IE: SKIP!"); - continue; - } - - bool fused = ld.skip; - Ptr layer = ld.layerInstance; - if (!fused && !layer->supportBackend(preferableBackend)) - { - CV_LOG_DEBUG(NULL, "DNN/IE: NOT supported!"); - bool customizable = ld.id != 0 && supportsCPUFallback; - - // TODO: there is a bug in Myriad plugin with custom layers shape infer. - if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL) - { - for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i) - { - customizable = ld.inputBlobs[i]->size[0] == 1; - } - } - - // TODO: fix these workarounds - if (preferableTarget == DNN_TARGET_MYRIAD || - preferableTarget == DNN_TARGET_HDDL || - preferableTarget == DNN_TARGET_OPENCL || - preferableTarget == DNN_TARGET_OPENCL_FP16) - customizable &= ld.type != "Concat"; - - if (preferableTarget == DNN_TARGET_OPENCL || - preferableTarget == DNN_TARGET_OPENCL_FP16) - customizable &= ld.type != "Power"; - - if (preferableTarget == DNN_TARGET_OPENCL) - customizable &= ld.type != "Eltwise"; - - if (!customizable) - { - CV_LOG_DEBUG(NULL, "DNN/IE: NOT customizable!"); - addNgraphOutputs(ld); - net = Ptr(); - layer->preferableTarget = DNN_TARGET_CPU; - - for (int i = 0; i < ld.inputBlobsId.size(); ++i) - { - LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; - Ptr inpNode = inpLd.backendNodes[preferableBackend]; - if (!inpNode.empty()) { - Ptr ieNode = inpNode.dynamicCast(); - CV_Assert(!ieNode.empty()); - ieNode->net->addOutput(ieNode); - } - } - continue; - } - } - ld.skip = true; // Initially skip all Inference Engine supported layers. - - // Create a new network if one of inputs from different Inference Engine graph. - std::vector> inputNodes; - for (int i = 0; i < ld.inputBlobsId.size(); ++i) - { - // Layer_Test_ROIPooling.Accuracy has 2 inputs inpLD = 0, 0 -> has 4 inputNodes (input, rois, input, rois) - if (inputNodes.size() == ld.inputBlobsId.size()) { - break; - } - LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; - Ptr inpNode = inpLd.backendNodes[preferableBackend]; - if (!inpNode.empty()) - { - Ptr ieInpNode = inpNode.dynamicCast(); - CV_Assert(!ieInpNode.empty()); CV_Assert(!ieInpNode->net.empty()); - if (ieInpNode->net == net && !fused) { - inputNodes.push_back(inpNode); - continue; - } - } - - if (net.empty()) { - net = Ptr(new InfEngineNgraphNet(*this)); - } - - if (!fused) { - std::vector inputNames; - std::vector inputs; - - auto curr_pos = inpLd.consumers.begin(); - auto compare = [&ld] (const LayerPin& lp) { return lp.lid == ld.id; }; - auto cons = curr_pos; - while ((cons = std::find_if(curr_pos, inpLd.consumers.end(), compare)) != - inpLd.consumers.end()) { - int cons_inp = cons->oid; - Ptr inpWrapper = inpLd.outputBlobsWrappers[cons_inp]. - dynamicCast(); - CV_Assert(!inpWrapper.empty()); - auto iter = std::find(inputNames.begin(), inputNames.end(), - inpWrapper->dataPtr->getName()); - if (iter == inputNames.end()) { - inputNames.push_back(inpWrapper->dataPtr->getName()); - inputs.push_back(inpLd.outputBlobs[cons_inp]); - } - curr_pos = cons + 1; - } - - auto inps = net->setInputs(inputs, inputNames); - for (auto& inp : inps) { - inputNodes.emplace_back(Ptr(new InfEngineNgraphNode(inp))); - } - } - } - - Ptr node; - if (!net.empty()) - { - if (fused) - { - bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 && - ld.inputBlobs[0]->data == ld.outputBlobs[0].data; - CV_Assert(inPlace); - node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend]; - ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers; - } - } - else { - net = Ptr(new InfEngineNgraphNet(*this)); - } - - if (!fused) - { - CV_Assert(ld.inputBlobsId.size() == inputNodes.size()); - for (int i = 0; i < ld.inputBlobsId.size(); ++i) - { - int lid = ld.inputBlobsId[i].lid; - int oid = ld.inputBlobsId[i].oid; - if (oid == 0 || lid == 0) - continue; - - auto ieInpNode = inputNodes[i].dynamicCast(); - const auto& ngraph_input_node = ieInpNode->node; - CV_LOG_DEBUG(NULL, "DNN/IE: bind output port " << lid << ":" << oid << " (" << ngraph_input_node->get_friendly_name() << ":" << ngraph_input_node->get_type_info().name << ")"); - - // Handle parameters from other subnets. Output port is not used in this case - if ((ngraph::op::is_parameter(ngraph_input_node) || ngraph::op::is_constant(ngraph_input_node)) && - ngraph_input_node->get_output_size() == 1) - { - inputNodes[i] = Ptr(new InfEngineNgraphNode(ngraph_input_node)); - continue; - } - CV_CheckLT((size_t)oid, ngraph_input_node->get_output_size(), ""); -#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4) - // FIXIT refactor ".initNgraph()" API to use Output - // WA: use Concat to emulate Identity operation with requested output port - auto oid_node = std::make_shared(ngraph::OutputVector {ngraph_input_node->output(oid)}, 0); - inputNodes[i] = Ptr(new InfEngineNgraphNode(oid_node)); -#elif INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_3) - inputNodes[i] = Ptr(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid))); -#else - inputNodes[i] = Ptr(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid, false))); -#endif - } - - if (layer->supportBackend(preferableBackend)) - { - CV_LOG_DEBUG(NULL, "DNN/IE: wrap layer " << ld.name << "@" << ld.type << " - outputs: " << ld.outputBlobsWrappers.size()); - node = layer->initNgraph(ld.inputBlobsWrappers, inputNodes); -#if 0 // FIXIT doesn't work with multiple outputs (set name is applied to the same node) - for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) - { - InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]); - node.dynamicCast()->setName(dataPtr->getName()); - } -#else - node.dynamicCast()->setName(layer->name); -#endif - } - else - { - CV_LOG_DEBUG(NULL, "DNN/IE: layer is not supported: " << ld.name << "@" << ld.type); - node = Ptr(new InfEngineNgraphNode(inputNodes, - ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals)); - } - } - else if (node.empty()) - { - CV_LOG_DEBUG(NULL, "DNN/IE: node.empty() bypass..."); - continue; - } - - ld.backendNodes[preferableBackend] = node; - - Ptr ieNode = node.dynamicCast(); - CV_Assert(!ieNode.empty()); - ieNode->net = net; - - for (const auto& pin : blobsToKeep_) - { - if (pin.lid == ld.id) - { - ieNode->net->addOutput(ieNode); - break; - } - } - ieNode->net->setNodePtr(&ieNode->node); - - net->addBlobs(ld.inputBlobsWrappers); - net->addBlobs(ld.outputBlobsWrappers); - addNgraphOutputs(ld); - } - - // Initialize all networks. - for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it) - { - LayerData &ld = it->second; - auto iter = ld.backendNodes.find(preferableBackend); - if (iter == ld.backendNodes.end()) - continue; - - Ptr& node = iter->second; - if (node.empty()) - continue; - - Ptr ieNode = node.dynamicCast(); - if (ieNode.empty()) - continue; - - CV_Assert(!ieNode->net.empty()); - - if (!ieNode->net->isInitialized()) - { - ieNode->net->addOutput(ieNode); - ieNode->net->createNet((Target)preferableTarget); - ld.skip = false; - } - } - } -#endif // HAVE_DNN_NGRAPH - -#ifdef HAVE_WEBNN - void addWebnnOutputs(LayerData &ld) - { - CV_TRACE_FUNCTION(); - - Ptr layerNet; - auto it = ld.backendNodes.find(preferableBackend); - if (it != ld.backendNodes.end()) - { - Ptr node = it->second; - if (!node.empty()) - { - Ptr webnnNode = node.dynamicCast(); - CV_Assert(!webnnNode.empty()); CV_Assert(!webnnNode->net.empty()); - layerNet = webnnNode->net; - } - } - - for (int i = 0; i < ld.inputBlobsId.size(); ++i) - { - LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; - Ptr inpNode = inpLd.backendNodes[preferableBackend]; - if (!inpNode.empty()) - { - Ptr webnnInpNode = inpNode.dynamicCast(); - CV_Assert(!webnnInpNode.empty()); CV_Assert(!webnnInpNode->net.empty()); - if (layerNet != webnnInpNode->net) - { - webnnInpNode->net->addOutput(webnnInpNode->name); - webnnInpNode->net->setUnconnectedNodes(webnnInpNode); - } - } - } - } - - void initWebnnBackend(const std::vector& blobsToKeep_) - { - CV_TRACE_FUNCTION(); - CV_Assert_N(preferableBackend == DNN_BACKEND_WEBNN, haveWebnn()); - - Ptr net; - - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) - { - LayerData &ld = it->second; - if (ld.id == 0) - { - CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) || - (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size())); - for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) - { - Ptr wrapper = ld.outputBlobsWrappers[i].dynamicCast(); - std::string outputName = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]; - outputName = ld.outputBlobsWrappers.size() > 1 ? (outputName + "." + std::to_string(i)) : outputName; - wrapper->name = outputName; - } - } - else - { - for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) - { - Ptr wrapper = ld.outputBlobsWrappers[i].dynamicCast(); - std::string outputName = ld.outputBlobsWrappers.size() > 1 ? (ld.name + "." + std::to_string(i)) : ld.name; - wrapper->name = outputName; - } - } - } - - // Build WebNN networks from sets of layers that support this - // backend. Split a whole model on several WebNN networks if - // some of layers are not implemented. - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) - { - LayerData &ld = it->second; - - if (ld.id == 0 && ld.skip) - continue; - - bool fused = ld.skip; - Ptr layer = ld.layerInstance; - if (!fused && !layer->supportBackend(preferableBackend)) - { - // For test use. when not using WebNN, the test case will fail - // with the following code. - CV_LOG_WARNING(NULL, "Layer " + ld.type + " name " + ld.name + " is unsupported by WebNN backend."); - - addWebnnOutputs(ld); - net = Ptr(); - layer->preferableTarget = DNN_TARGET_CPU; - - for (int i = 0; i < ld.inputBlobsId.size(); ++i) - { - LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; - Ptr inpNode = inpLd.backendNodes[preferableBackend]; - if (!inpNode.empty()) { - Ptr webnnNode = inpNode.dynamicCast(); - CV_Assert(!webnnNode.empty()); - webnnNode->net->setUnconnectedNodes(webnnNode); - } - } - continue; - } - ld.skip = true; // Initially skip all WebNN supported layers. - - // Create a new network if one of inputs from different WebNN graph. - std::vector> inputNodes; - for (int i = 0; i < ld.inputBlobsId.size(); ++i) - { - // Layer_Test_ROIPooling.Accuracy has 2 inputs inpLD = 0, 0 -> has 4 inputNodes (input, rois, input, rois) - if (inputNodes.size() == ld.inputBlobsId.size()) { - break; - } - LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; - Ptr inpNode = inpLd.backendNodes[preferableBackend]; - if (!inpNode.empty()) - { - Ptr webnnInpNode = inpNode.dynamicCast(); - CV_Assert(!webnnInpNode.empty()); CV_Assert(!webnnInpNode->net.empty()); - if (webnnInpNode->net == net && !fused) { - inputNodes.push_back(inpNode); - continue; - } - } - - if (net.empty()) { - net = Ptr(new WebnnNet()); - } - - if (!fused) { - std::vector inputNames; - std::vector inputs; - - auto curr_pos = inpLd.consumers.begin(); - auto compare = [&ld] (const LayerPin& lp) { return lp.lid == ld.id; }; - auto cons = curr_pos; - while ((cons = std::find_if(curr_pos, inpLd.consumers.end(), compare)) != - inpLd.consumers.end()) { - int cons_inp = cons->oid; - Ptr inpWrapper = inpLd.outputBlobsWrappers[cons_inp]. - dynamicCast(); - CV_Assert(!inpWrapper.empty()); - auto iter = std::find(inputNames.begin(), inputNames.end(), - inpWrapper->name); - if (iter == inputNames.end()) { - inputNames.push_back(inpWrapper->name); - inputs.push_back(inpLd.outputBlobs[cons_inp]); - } - curr_pos = cons + 1; - } - - auto inps = net->setInputs(inputs, inputNames); - for (auto& inp : inps) { - WebnnBackendNode* node = new WebnnBackendNode(inp); - node->net = net; - inputNodes.emplace_back(Ptr(node)); - } - } - } - - Ptr node; - if (!net.empty()) - { - if (fused) - { - bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 && - ld.inputBlobs[0]->data == ld.outputBlobs[0].data; - CV_Assert(inPlace); - node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend]; - ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers; - } - } - else { - net = Ptr(new WebnnNet()); - } - - if (!fused) - { - CV_Assert(ld.inputBlobsId.size() == inputNodes.size()); - for (int i = 0; i < ld.inputBlobsId.size(); ++i) - { - int lid = ld.inputBlobsId[i].lid; - int oid = ld.inputBlobsId[i].oid; - if (oid == 0 || lid == 0) - continue; - - auto webnnInpNode = inputNodes[i].dynamicCast(); - inputNodes[i] = Ptr(new WebnnBackendNode(webnnInpNode->operand)); - } - - if (layer->supportBackend(preferableBackend)) - { - if (ld.type == "Const") { - ml::Operand fake_operand; - Ptr fake_input_node = Ptr(new WebnnBackendNode(fake_operand)); - fake_input_node->net = net; - inputNodes.push_back(fake_input_node); - } - node = layer->initWebnn(ld.inputBlobsWrappers, inputNodes); - for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) - { - Ptr wrapper = ld.outputBlobsWrappers[i].dynamicCast(); - node.dynamicCast()->name = wrapper->name; - } - } - else - { - continue; - } - } - else if (node.empty()) - continue; - - ld.backendNodes[preferableBackend] = node; - - Ptr webnnNode = node.dynamicCast(); - CV_Assert(!webnnNode.empty()); - webnnNode->net = net; - - if (ld.consumers.empty()) { - // TF EAST_text_detection - webnnNode->net->setUnconnectedNodes(webnnNode); - } - for (const auto& pin : blobsToKeep_) - { - if (pin.lid == ld.id) - { - webnnNode->net->addOutput(webnnNode->name); - break; - } - } - net->addBlobs(ld.inputBlobsWrappers); - net->addBlobs(ld.outputBlobsWrappers); - addWebnnOutputs(ld); - } - - // Initialize all networks. - for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it) - { - LayerData &ld = it->second; - auto iter = ld.backendNodes.find(preferableBackend); - if (iter == ld.backendNodes.end()) - continue; - - Ptr& node = iter->second; - if (node.empty()) - continue; - - Ptr webnnNode = node.dynamicCast(); - if (webnnNode.empty()) - continue; - - CV_Assert(!webnnNode->net.empty()); - - if (!webnnNode->net->isInitialized()) - { - webnnNode->net->setUnconnectedNodes(webnnNode); - webnnNode->net->createNet((Target)preferableTarget); - ld.skip = false; - } - } - } -#endif - - void initVkComBackend() - { - CV_TRACE_FUNCTION(); - CV_Assert(preferableBackend == DNN_BACKEND_VKCOM); -#ifdef HAVE_VULKAN - if (!haveVulkan()) - return; - - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) - { - LayerData &ld = it->second; - Ptr layer = ld.layerInstance; - if (!layer->supportBackend(preferableBackend)) - { - continue; - } - - ld.skip = false; - - try - { - ld.backendNodes[DNN_BACKEND_VKCOM] = - layer->initVkCom(ld.inputBlobsWrappers); - } - catch (const cv::Exception& e) - { - CV_LOG_ERROR(NULL, "initVkCom failed, fallback to CPU implementation. " << e.what()); - ld.backendNodes[DNN_BACKEND_VKCOM] = Ptr(); - } - } -#endif - } - - void initCUDABackend(const std::vector& blobsToKeep_) - { - CV_Assert(haveCUDA()); - CV_Assert(preferableBackend == DNN_BACKEND_CUDA); - -#ifdef HAVE_CUDA - if (!cudaInfo) /* we need to check only once */ - cuda4dnn::checkVersions(); - - if (cuda4dnn::getDeviceCount() <= 0) - CV_Error(Error::StsError, "No CUDA capable device found."); - - if (cuda4dnn::getDevice() < 0) - CV_Error(Error::StsError, "No CUDA capable device selected."); - - if (!cuda4dnn::isDeviceCompatible()) - CV_Error(Error::GpuNotSupported, "OpenCV was not built to work with the selected device. Please check CUDA_ARCH_PTX or CUDA_ARCH_BIN in your build configuration."); - - if (preferableTarget == DNN_TARGET_CUDA_FP16 && !cuda4dnn::doesDeviceSupportFP16()) - { - CV_LOG_WARNING(NULL, "The selected CUDA device does not support FP16 target; switching to FP32 target."); - preferableTarget = DNN_TARGET_CUDA; - } - - if (!cudaInfo) - { - cuda4dnn::csl::CSLContext context; - context.stream = cuda4dnn::csl::Stream(true); - context.cublas_handle = cuda4dnn::csl::cublas::Handle(context.stream); - context.cudnn_handle = cuda4dnn::csl::cudnn::Handle(context.stream); - - auto d2h_stream = cuda4dnn::csl::Stream(true); // stream for background D2H data transfers - cudaInfo = std::unique_ptr(new CudaInfo_t(std::move(context), std::move(d2h_stream))); - } - - cudaInfo->workspace = cuda4dnn::csl::Workspace(); // release workspace memory if any - - for (auto& layer : layers) - { - auto& ld = layer.second; - if (ld.id == 0) - { - for (auto& wrapper : ld.inputBlobsWrappers) - { - auto cudaWrapper = wrapper.dynamicCast(); - cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream); - } - } - - for (auto& wrapper : ld.outputBlobsWrappers) - { - auto cudaWrapper = wrapper.dynamicCast(); - cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream); - } - } - - for (auto& layer : layers) - { - auto& ld = layer.second; - auto& layerInstance = ld.layerInstance; - - if (!layerInstance->supportBackend(DNN_BACKEND_CUDA)) - { - std::ostringstream os; - os << "CUDA backend will fallback to the CPU implementation for the layer \"" << ld.name - << "\" of type " << ld.type << '\n'; - CV_LOG_INFO(NULL, os.str().c_str()); - continue; - } - - /* we make a copy so that `initCUDA` doesn't modify `cudaInfo->context` */ - auto context = cudaInfo->context; - auto node = layerInstance->initCUDA(&context, ld.inputBlobsWrappers, ld.outputBlobsWrappers); - ld.backendNodes[DNN_BACKEND_CUDA] = node; - - auto cudaNode = node.dynamicCast(); - cudaInfo->workspace.require(cudaNode->get_workspace_memory_in_bytes()); - } - - if (blobsToKeep_.size() > 1) - { - for (const auto& pin : blobsToKeep_) - { - LayerData& ld = layers[pin.lid]; - ld.cudaD2HBackgroundTransfers.push_back(pin.oid); - } - } -#endif - } - - void allocateLayer(int lid, const LayersShapesMap& layersShapes) - { - CV_TRACE_FUNCTION(); - - LayerData &ld = layers[lid]; - - //already allocated - if (ld.flag) - return; - - size_t ninputs = ld.inputBlobsId.size(); -#if 0 - printf("layer %s:", ld.name.c_str()); - for (size_t i = 0; i < ninputs; i++) - { - int inp_lid = ld.inputBlobsId[i].lid; - LayerData &inp_ld = layers[inp_lid]; - int inp_outputs = (int)inp_ld.outputBlobs.size(); - std::cout << " " << inp_ld.name << "(" << inp_outputs; - - for( int j = 0; j < inp_outputs; j++ ) - { - std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size; - } - std::cout << ")"; - } - printf("\n"); -#endif - - //determine parent layers - for (size_t i = 0; i < ninputs; i++) - ld.inputLayersId.insert(ld.inputBlobsId[i].lid); - - //allocate parents - for (set::const_iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++) - allocateLayer(*i, layersShapes); - - //bind inputs - if (ld.id == 0) // DataLayer - { - ninputs = netInputLayer->inputsData.size(); - ld.inputBlobsWrappers.resize(ninputs); - for (size_t i = 0; i < ninputs; i++) - ld.inputBlobsWrappers[i] = wrap(netInputLayer->inputsData[i]); - } - else - { - ld.inputBlobs.resize(ninputs); - ld.inputBlobsWrappers.resize(ninputs); - for (size_t i = 0; i < ninputs; i++) - { - LayerPin from = ld.inputBlobsId[i]; - CV_Assert(from.valid()); - CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid); - ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid]; - ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid]; - } - } - - LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid); - - CV_Assert(layerShapesIt != layersShapes.end()); - - if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_OPENCL_FP16 && ld.dtype == CV_32F) - ld.dtype = CV_16S; - - std::vector pinsForInternalBlobs; - blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs); - ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); - for (int i = 0; i < ld.outputBlobs.size(); ++i) - ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]); - - /* CUDA backend has its own system for internal blobs; we don't need these */ - ld.internalBlobsWrappers.resize((preferableBackend == DNN_BACKEND_CUDA) ? 0 : ld.internals.size()); - for (int i = 0; i < ld.internalBlobsWrappers.size(); ++i) - ld.internalBlobsWrappers[i] = wrap(ld.internals[i]); - - Ptr layerPtr = ld.getLayerInstance(); - { - std::vector inps(ld.inputBlobs.size()); - for (int i = 0; i < ld.inputBlobs.size(); ++i) - { - inps[i] = *ld.inputBlobs[i]; - } - layerPtr->finalize(inps, ld.outputBlobs); - layerPtr->preferableTarget = preferableTarget; -#if 0 - std::cout << "\toutputs:"; - size_t noutputs = ld.outputBlobs.size(); - for (size_t j = 0; j < noutputs; j++) - { - std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size; - } - std::cout << "\n"; -#endif - } - - // After allocation of layer, we decrease counters to it's input blobs. - blobManager.releaseReferences(ld.inputBlobsId); - blobManager.releaseReferences(pinsForInternalBlobs); - - ld.flag = 1; - } - -#if 0 -#define printf_(args) printf args -#else -#define printf_(args) -#endif - - void fuseLayers(const std::vector& blobsToKeep_) - { - CV_TRACE_FUNCTION(); - - if(!fusion || (preferableBackend != DNN_BACKEND_OPENCV && - preferableBackend != DNN_BACKEND_CUDA && - preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) - return; - -#if 0 // FIXIT mode without fusion is broken due to unsupported layers and handling of "custom" nodes - if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - return; -#endif - - // scan through all the layers. If there is convolution layer followed by the activation layer, - // we try to embed this activation into the convolution and disable separate execution of the activation - - // FIXIT replace by layersToKeep to avoid hacks like "LayerPin(lid, 0)" - std::set pinsToKeep(blobsToKeep_.begin(), - blobsToKeep_.end()); - for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++) - { - int lid = it->first; - LayerData& ld = layers[lid]; - if( ld.skip ) - { - printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str())); - continue; - } - printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str())); - - // the optimization #1. try to fuse batch norm, scaling and/or activation layers - // with the current layer if they follow it. Normally, the are fused with the convolution layer, - // but some of them (like activation) may be fused with fully-connected, elemwise (+) and - // some other layers. - Ptr& currLayer = ld.layerInstance; - if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 ) - { - LayerData* nextData = &layers[ld.consumers[0].lid]; - LayerPin lpNext(ld.consumers[0].lid, 0); - while (nextData) - { -#ifdef HAVE_INF_ENGINE - if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && pinsToKeep.count(lpNext) != 0) - { - CV_LOG_DEBUG(NULL, "DNN/IE: skip fusing with 'output' node: " << nextData->name << "@" << nextData->type); - break; - } -#endif - /* we use `tryFuse` member of convolution layer to fuse eltwise later - * it's not intended to be fused here; hence, we stop when we encounter eltwise - */ - if (preferableBackend == DNN_BACKEND_CUDA && ld.type == "Convolution" && nextData->type == "Eltwise") - break; - Ptr nextLayer = nextData->layerInstance; - if (currLayer->tryFuse(nextLayer)) - { - printf_(("\tfused with %s\n", nextLayer->name.c_str())); - nextData->skip = true; - ld.outputBlobs = layers[lpNext.lid].outputBlobs; - ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; - if (nextData->consumers.size() == 1) - { - int nextLayerId = nextData->consumers[0].lid; - nextData = &layers[nextLayerId]; - lpNext = LayerPin(nextLayerId, 0); - } - else - { - nextData = 0; - break; - } - } - else - break; - } - - if (preferableBackend != DNN_BACKEND_OPENCV && preferableBackend != DNN_BACKEND_CUDA) - continue; // Go to the next layer. - - // TODO: OpenCL target support more fusion styles. - if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) && - (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" && - ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" && - ld.layerInstance->type != "Concat")) ) - continue; - - if (preferableBackend == DNN_BACKEND_CUDA && IS_DNN_CUDA_TARGET(preferableTarget) - && ld.layerInstance->type != "Convolution" - && ld.layerInstance->type != "Concat") - continue; - - while (nextData) - { - // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh - if (IS_DNN_OPENCL_TARGET(preferableTarget) && - nextData->type != "ReLU" && - nextData->type != "ChannelsPReLU" && - nextData->type != "ReLU6" && - nextData->type != "TanH" && - nextData->type != "Power") - break; - - Ptr nextActivLayer = nextData->layerInstance.dynamicCast(); - if (nextActivLayer.empty()) - break; - - if (currLayer->setActivation(nextActivLayer)) - { - printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); - nextData->skip = true; - ld.outputBlobs = layers[lpNext.lid].outputBlobs; - ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; - if (nextData->consumers.size() == 1) - { - int nextLayerId = nextData->consumers[0].lid; - nextData = &layers[nextLayerId]; - lpNext = LayerPin(nextLayerId, 0); - } - else - { - nextData = 0; - break; - } - } - else - break; - } - - // OpenCL: fuse convolution layer followed by eltwise + relu - // CUDA: fuse convolution layer followed by eltwise (and optional activation) - while (nextData && - (IS_DNN_OPENCL_TARGET(preferableTarget) || IS_DNN_CUDA_TARGET(preferableTarget)) && - ld.layerInstance->type == "Convolution" - ) // semantic of 'if' - { - Ptr nextEltwiseLayer = nextData->layerInstance.dynamicCast(); - if (nextEltwiseLayer.empty()) - break; - -#ifdef HAVE_CUDA - // CUDA backend supports fusion with eltwise sum (without variable channels) - if (IS_DNN_CUDA_TARGET(preferableTarget) && !nextEltwiseLayer.empty()) - { - // we create a temporary backend node for eltwise layer to obtain the eltwise configuration - cuda4dnn::csl::CSLContext context; // assume that initCUDA and EltwiseOp do not use the context during init - const auto node = nextData->layerInstance->initCUDA(&context, nextData->inputBlobsWrappers, nextData->outputBlobsWrappers); - auto eltwiseNode = node.dynamicCast(); - - // broadcasting not supported in fused ops - auto required_shape = shape(nextData->outputBlobs[0]); - for (int i = 0; i < nextData->inputBlobs.size(); i++) - { - if (shape(*nextData->inputBlobs[i]) != required_shape) - { - eltwiseNode.reset(); - break; - } - } - - // CUDA backend uses EltwiseOp when all operands have the same number of channels; otherwise, ShortcutOp is used. - // Hence, a successful cast to EltwiseOp implies that the number of channels is same in all operand tensors. - if (eltwiseNode.empty() || eltwiseNode->op != cuda4dnn::EltwiseOpType::SUM || !eltwiseNode->coeffs.empty()) - break; - } -#endif - - if (IS_DNN_OPENCL_TARGET(preferableTarget) && pinsToKeep.count(lpNext) != 0) - break; - if (nextData->inputBlobsId.size() != 2) - break; - - if (IS_DNN_OPENCL_TARGET(preferableTarget)) - { - if (!nextData->params.has("operation") || toLowerCase(nextData->params.get("operation")) == "sum") - { - if (nextData->params.has("coeff")) - { - DictValue paramCoeff = nextData->params.get("coeff"); - int n = paramCoeff.size(); - bool isCoeffOneOne = (n == 2); - for (int i = 0; isCoeffOneOne && i < n; i++) - { - float c = paramCoeff.get(i); - isCoeffOneOne &= (c == 1.0f); - } - if (!isCoeffOneOne) - { - CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion of 'Sum' without coeffs (or {1.0, 1.0}) is supported only"); - break; - } - } - } - else - { - CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion with eltwise operation is not supported: " << nextData->params.get("operation")); - break; - } - } - - { - LayerData *eltwiseData = nextData; - - // Eltwise layer has two inputs. We need to determine which - // is a base convolution layer and which could be used as it's bias. - LayerData* biasLayerData = 0; - for (int i = 0; i < 2; ++i) - { - LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[i].lid]; - CV_Assert(downLayerData); - while (downLayerData->skip) - { - if (downLayerData->inputBlobsId.size() == 1) - downLayerData = &layers[downLayerData->inputBlobsId[0].lid]; - else - { - downLayerData = 0; - break; - } - } - if (downLayerData && ld.id == downLayerData->id) - { - biasLayerData = &layers[eltwiseData->inputBlobsId[1 - i].lid]; - break; - } - } - CV_Assert(biasLayerData); - { - // fuse eltwise + activation layer - // bias must already be computed to fuse => bias layer must appear before convolution - if (biasLayerData->id < ld.id) - { - /* we can fuse activation if: - * => activation layer that follows is the only consumer of eltwise output - * => activation layer does not process multiple inputs - * => we do not require to keep the output of eltwise - */ - Ptr nextFusabeleActivLayer; - if (eltwiseData->consumers.size() == 1 && pinsToKeep.count(lpNext) == 0) - { - nextData = &layers[eltwiseData->consumers[0].lid]; - lpNext = LayerPin(eltwiseData->consumers[0].lid, 0); - CV_Assert(nextData); - if (nextData->outputBlobs.size() == 1) - nextFusabeleActivLayer = nextData->layerInstance.dynamicCast(); - } - else - { - // OCL backend cannot fuse in this case but the CUDA backend can continue with just eltwise - nextData = 0; - } - - // the requirements of OCV OpenCL backend and CUDA backend are different - // we need to check them separately; hence, the fuse variables - bool fuse_eltwise = false, fuse_activation = false; - - Ptr activ_power; - if (IS_DNN_OPENCL_TARGET(preferableTarget) && !nextFusabeleActivLayer.empty() && - nextData && - (!nextData->type.compare("ReLU") || - !nextData->type.compare("ChannelsPReLU") || - (!nextData->type.compare("Power") && (activ_power = nextFusabeleActivLayer.dynamicCast()) && activ_power->scale == 1.0f) - ) && - currLayer->setActivation(nextFusabeleActivLayer)) - { - fuse_eltwise = true; - fuse_activation = true; - } - - if (IS_DNN_CUDA_TARGET(preferableTarget)) - { - /* supported fusion options: - * => convolution + eltwise - * => activation(convolution) + eltwise - * > convolution + activation would have been fused already; we have to fuse eltwise - * => activation(convolution + eltwise) - * > fuse eltwise and then activation - */ - auto layer = nextEltwiseLayer.staticCast(); - if (currLayer->tryFuse(layer)) - { - fuse_eltwise = true; /* eltwise was successfully fused */ - if (!nextFusabeleActivLayer.empty() && nextData) - { - if ((!nextData->type.compare("ReLU") || - !nextData->type.compare("ReLU6") || - !nextData->type.compare("Power") || - !nextData->type.compare("TanH") || - !nextData->type.compare("Sigmoid") || - !nextData->type.compare("Swish") || - !nextData->type.compare("Mish")) && - currLayer->setActivation(nextFusabeleActivLayer)) - { - // activation was fused - fuse_activation = true; - } - } - } - } - - CV_Assert(!fuse_activation || fuse_eltwise); /* cannot fuse activation without eltwise */ - if(fuse_eltwise && fuse_activation) - { - CV_Assert(nextData); - CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1); - ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]); - printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); - printf_(("\tfused with %s\n", nextFusabeleActivLayer->name.c_str())); - eltwiseData->skip = true; - nextData->skip = true; - // This optimization for cases like - // some_layer conv - // | | - // +-- eltwise --+ - // | - // activ - // This way all the element-wise computations - // (i.e. some_layer+conv or some_layer*conv) - // would be done at [conv] layer. So we need to - // replace [conv]'s output blob to [eltwise]'s one - // considering that [activ] is an in-place layer. - // Also we need to move all the consumers' references. - // To prevent memory collisions (i.e. when input of - // [conv] and output of [eltwise] is the same blob) - // we allocate a new blob. - CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1); - ld.outputBlobs[0] = ld.outputBlobs[0].clone(); - ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]); - - eltwiseData->outputBlobs = ld.outputBlobs; - nextData->outputBlobs = ld.outputBlobs; - eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers; - nextData->outputBlobsWrappers = ld.outputBlobsWrappers; - - // Move references of [activ] layer consumers to the newly allocated blob. - for (int i = 0; i < nextData->consumers.size(); ++i) - { - LayerData& consumer = layers[nextData->consumers[i].lid]; - for (int j = 0; j < consumer.inputBlobsId.size(); ++j) - { - if (consumer.inputBlobsId[j].lid == lpNext.lid) - { - consumer.inputBlobs[j] = &ld.outputBlobs[0]; - consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0]; - break; - } - } - } - } - else if (fuse_eltwise) // conv + eltwise (note: conv could have fused activations before eltwise) - { - CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget)); - CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1); - ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]); - printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); - eltwiseData->skip = true; - // This optimization is for cases like - // some_layer conv (maybe fused with activ) - // | | - // +-- eltwise --+ - // - // This way all the element-wise computations - // (i.e. some_layer+conv or some_layer*conv) - // would be done at [conv] layer. So we need to - // replace [conv]'s output blob to [eltwise]'s one. - // Also we need to move all the consumers' references. - // To prevent memory collisions (i.e. when input of - // [conv] and output of [eltwise] is the same blob) - // we allocate a new blob. - CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1); - ld.outputBlobs[0] = ld.outputBlobs[0].clone(); - ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]); - - eltwiseData->outputBlobs = ld.outputBlobs; - eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers; - - // Move references of [eltwise] layer consumers to the newly allocated blob. - for (int i = 0; i < eltwiseData->consumers.size(); ++i) - { - LayerData& consumer = layers[eltwiseData->consumers[i].lid]; - for (int j = 0; j < consumer.inputBlobsId.size(); ++j) - { - if (consumer.inputBlobsId[j].lid == eltwiseData->id) - { - consumer.inputBlobs[j] = &ld.outputBlobs[0]; - consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0]; - break; - } - } - } - } - } - } - } - - break; - } - } - - if (preferableBackend != DNN_BACKEND_OPENCV && preferableBackend != DNN_BACKEND_CUDA) - continue; // Go to the next layer. - - // the optimization #2. if there is concat layer that concatenates channels - // from the inputs together (i.e. axis == 1) then we make the inputs of - // the concat layer to write to the concatenation output buffer - // (and so we eliminate the concatenation layer, because the channels - // are concatenated implicitly). - Ptr concatLayer = ld.layerInstance.dynamicCast(); - if( !concatLayer.empty() && !concatLayer->padding && ld.outputBlobs.size() == 1 ) - { - Mat& output = ld.outputBlobs[0]; - UMat umat_output; -#ifdef HAVE_OPENCL - if (!ld.outputBlobsWrappers.empty() && - (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))) - { - size_t i, ninputs = ld.inputBlobsId.size(); - bool conv_layer = true; - for( i = 0; i < ninputs; i++ ) - { - LayerPin pin = ld.inputBlobsId[i]; - LayerData* inp_i_data = &layers[pin.lid]; - while(inp_i_data->skip && - inp_i_data->inputBlobsId.size() == 1 && - inp_i_data->consumers.size() == 1) - { - pin = inp_i_data->inputBlobsId[0]; - inp_i_data = &layers[pin.lid]; - } - conv_layer = conv_layer && (inp_i_data->getLayerInstance()->type == "Convolution"); - } - if (!conv_layer) - continue; - std::vector umat_outputBlobs; - umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); - umat_output = umat_outputBlobs[0]; - } -#endif - - // TODO: in general, this optimization can always be done, but - // many layers currently check that the input/output blobs are - // continuous arrays. Unfortunately, this is not true when - // the concatenation optimization is applied with batch_size > 1. - // so, for now, we only apply this optimization in the most popular - // case batch_size == 1. - int axis = normalize_axis(concatLayer->axis, output.dims); - if( output.total(0, axis) == 1 ) - { - size_t i, ninputs = ld.inputBlobsId.size(); - std::vector realinputs(ninputs); - for( i = 0; i < ninputs; i++ ) - { - LayerPin pin = ld.inputBlobsId[i]; - LayerData* inp_i_data = &layers[pin.lid]; - while(inp_i_data->skip && - inp_i_data->inputBlobsId.size() == 1 && - inp_i_data->consumers.size() == 1) - { - pin = inp_i_data->inputBlobsId[0]; - inp_i_data = &layers[pin.lid]; - } - printf_(("\treal input for %s is %s\n", - layers[ld.inputBlobsId[i].lid].getLayerInstance()->name.c_str(), - inp_i_data->getLayerInstance()->name.c_str())); - - if(inp_i_data->skip || inp_i_data->consumers.size() != 1) - break; -#ifdef HAVE_CUDA - if (preferableBackend == DNN_BACKEND_CUDA && - (inp_i_data->layerInstance->supportBackend(DNN_BACKEND_CUDA) == false || - (inp_i_data->layerInstance->type != "Convolution" && - inp_i_data->layerInstance->type != "Pooling" && - inp_i_data->layerInstance->type != "Resize" && - inp_i_data->layerInstance->type != "Flatten" && - inp_i_data->layerInstance->type != "Permute" && - inp_i_data->layerInstance->type != "Reorg" && - inp_i_data->layerInstance->type != "Eltwise" && - inp_i_data->layerInstance.dynamicCast().empty()))) - { - break; - } -#endif - realinputs[i] = pin; - } - - if( i >= ninputs ) - { - // Allocate new memory to prevent collisions during memory - // reusing (see https://github.com/opencv/opencv/pull/10456). - output = output.clone(); -#ifdef HAVE_OPENCL - if (preferableBackend == DNN_BACKEND_OPENCV && - IS_DNN_OPENCL_TARGET(preferableTarget)) - { - std::vector umats(1); - umat_output = umat_output.clone(); - umats[0] = umat_output; - OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umats); - } -#endif - -#ifdef HAVE_CUDA - if (preferableBackend == DNN_BACKEND_CUDA) - ld.outputBlobsWrappers[0] = wrap(output); -#endif - std::vector chrange(output.dims, Range::all()); - int ofs = 0; - for( i = 0; i < ninputs; i++ ) - { - LayerPin pin = realinputs[i]; - LayerData* inp_i_data = &layers[pin.lid]; - int channels_i = ld.inputBlobs[i]->size[axis]; - chrange[axis] = Range(ofs, ofs + channels_i); - printf_(("\toutput %s(%d) to channels (%d, %d)\n", inp_i_data->layerInstance->name.c_str(), - pin.oid, ofs, ofs + channels_i)); - ofs += channels_i; - Mat output_slice = output(chrange); - Mat& curr_output = inp_i_data->outputBlobs[pin.oid]; - CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size); - Mat* oldPtr = &curr_output; - curr_output = output_slice; -#ifdef HAVE_OPENCL - if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) - { - std::vector umats(inp_i_data->outputBlobsWrappers.size()); - umats[pin.oid] = umat_output(chrange); - OpenCLBackendWrapper::update(inp_i_data->outputBlobsWrappers, umats); - } -#endif -#ifdef HAVE_CUDA - if (preferableBackend == DNN_BACKEND_CUDA) - { - auto cuda_wrapper = wrap(output).dynamicCast(); - auto offset = chrange[axis].start * output_slice.total(axis + 1, output.dims); - auto new_shape = shape(output_slice); - cuda_wrapper->update(new_shape, offset); - inp_i_data->outputBlobsWrappers[pin.oid] = cuda_wrapper.staticCast(); - } -#endif - // Layers that refer old input Mat will refer to the - // new data but the same Mat object. - CV_Assert_N(curr_output.data == output_slice.data, oldPtr == &curr_output); - } - -#ifdef HAVE_CUDA - if (preferableBackend == DNN_BACKEND_CUDA) - { - for (int i = 0; i < ld.consumers.size(); i++) - { - LayerData& consumer = layers[ld.consumers[i].lid]; - for (int j = 0; j < consumer.inputBlobsId.size(); j++) - { - if (consumer.inputBlobsId[j].lid == ld.id) - { - CV_Assert(consumer.inputBlobs[j]->data == ld.outputBlobs[0].data); - consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0]; - break; - } - } - } - } -#endif - ld.skip = true; - printf_(("\toptimized out Concat layer %s\n", concatLayer->name.c_str())); - } - } - } - } - } - - void allocateLayers(const std::vector& blobsToKeep_) - { - CV_TRACE_FUNCTION(); - - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) - it->second.flag = 0; - - CV_Assert(!layers[0].outputBlobs.empty()); - ShapesVec inputShapes; - for(int i = 0; i < layers[0].outputBlobs.size(); i++) - { - Mat& inp = layers[0].outputBlobs[i]; - CV_Assert(inp.total()); - if (preferableBackend == DNN_BACKEND_OPENCV && - preferableTarget == DNN_TARGET_OPENCL_FP16 && - layers[0].dtype == CV_32F) - { - layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S); - } - inputShapes.push_back(shape(inp)); - } - LayersShapesMap layersShapes; - getLayersShapes(inputShapes, layersShapes); - - blobManager.reset(); - backendWrappers.clear(); - - for(auto& layer : layers) - { - auto& ld = layer.second; - ld.inputBlobsWrappers.clear(); - ld.outputBlobsWrappers.clear(); - ld.internalBlobsWrappers.clear(); - } - - // Fake references to input blobs. - for (int i = 0; i < layers[0].outputBlobs.size(); ++i) - blobManager.addReference(LayerPin(0, i)); - for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); ++it) - { - const LayerData& ld = it->second; - blobManager.addReferences(ld.inputBlobsId); - } - - for (int i = 0; i < blobsToKeep_.size(); i++) - { - blobManager.addReference(blobsToKeep_[i]); - } - - for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++) - { - int lid = it->first; - allocateLayer(lid, layersShapes); - } - - layersTimings.resize(lastLayerId + 1, 0); - fuseLayers(blobsToKeep_); - } - - void forwardLayer(LayerData &ld) - { - CV_TRACE_FUNCTION(); - - Ptr layer = ld.layerInstance; - - if( !ld.skip ) - { - TickMeter tm; - tm.start(); - -#ifndef HAVE_VULKAN - std::map >::const_iterator it = ld.backendNodes.find(preferableBackend); -#else - std::map >::iterator it = ld.backendNodes.find(preferableBackend); -#endif - if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty()) - { - if (isAsync) - CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode"); - - if (!layer->supportBackend(DNN_BACKEND_OPENCV)) - CV_Error(Error::StsNotImplemented, format("Layer \"%s\" of type \"%s\" unsupported on OpenCV backend", - ld.name.c_str(), ld.type.c_str())); - -#ifdef HAVE_OPENCL - if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) - { - std::vector umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers); - std::vector umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); - std::vector umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers); - layer->forward(umat_inputBlobs, - umat_outputBlobs, - umat_internalBlobs); - if (DNN_CHECK_NAN_INF) - { - bool fail = false; - for (size_t i = 0; i < umat_outputBlobs.size(); ++i) - { - UMat& u = umat_outputBlobs[i]; - Mat m; - if (u.depth() == CV_16S) // FP16 - convertFp16(u, m); - else - m = u.getMat(ACCESS_READ); - if (!checkRange(m)) - { - std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; - std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; - fail = true; - } - else if (!checkRange(m, true, NULL, -1e6, 1e6)) - { - std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; - std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; - fail = true; - } - } - if (fail) - { - for (size_t i = 0; i < umat_inputBlobs.size(); ++i) - { - UMat& u = umat_inputBlobs[i]; - Mat m; - if (u.depth() == CV_16S) // FP16 - convertFp16(u, m); - else - m = u.getMat(ACCESS_READ); - std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl; - if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; - } - for (size_t i = 0; i < umat_outputBlobs.size(); ++i) - { - UMat& u = umat_outputBlobs[i]; - Mat m; - if (u.depth() == CV_16S) // FP16 - convertFp16(u, m); - else - m = u.getMat(ACCESS_READ); - std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl; - if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; - } - for (size_t i = 0; i < umat_internalBlobs.size(); ++i) - { - UMat& u = umat_internalBlobs[i]; - Mat m; - if (u.depth() == CV_16S) // FP16 - convertFp16(u, m); - else - m = u.getMat(ACCESS_READ); - std::cout << "INTERNAL " << i << " " << shape(m) << std::endl; - if (DNN_CHECK_NAN_INF_DUMP) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl; - } - if (DNN_CHECK_NAN_INF_RAISE_ERROR) - CV_Assert(!fail); - } - } - OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs); - } - else -#endif - { - for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i) - { - if (!ld.inputBlobsWrappers[i].empty()) - ld.inputBlobsWrappers[i]->copyToHost(); - } - - std::vector inps(ld.inputBlobs.size()); - for (int i = 0; i < ld.inputBlobs.size(); ++i) - { - inps[i] = *ld.inputBlobs[i]; - } - layer->forward(inps, ld.outputBlobs, ld.internals); - - if (DNN_CHECK_NAN_INF) - { - bool fail = false; - for (size_t i = 0; i < ld.outputBlobs.size(); ++i) - { - const Mat& m = ld.outputBlobs[i]; - if (!checkRange(m)) - { - std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; - std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; - fail = true; - } - else if (!checkRange(m, true, NULL, -1e6, 1e6)) - { - std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; - std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; - fail = true; - } - } - if (fail) - { - for (size_t i = 0; i < ld.inputBlobs.size(); ++i) - { - const Mat* pM = ld.inputBlobs[i]; - if (!pM) - { - std::cout << "INPUT " << i << " is NULL" << std::endl; - continue; - } - const Mat& m = *pM; - std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; - if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; - } - for (size_t i = 0; i < ld.outputBlobs.size(); ++i) - { - const Mat& m = ld.outputBlobs[i]; - std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; - if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; - } - for (size_t i = 0; i < ld.internals.size(); ++i) - { - const Mat& m = ld.internals[i]; - std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; - if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl; - } - if (DNN_CHECK_NAN_INF_RAISE_ERROR) - CV_Assert(!fail); - } - } - - for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i) - { - if (!ld.outputBlobsWrappers[i].empty()) - ld.outputBlobsWrappers[i]->setHostDirty(); - } - } - } - else - { - Ptr node = it->second; - CV_Assert(!node.empty()); - if (preferableBackend == DNN_BACKEND_CUDA) - { - CV_Assert(haveCUDA()); - -#ifdef HAVE_CUDA - Ptr cudaNode = node.dynamicCast(); - CV_Assert(!cudaNode.empty()); - - cudaNode->forward(ld.inputBlobsWrappers, ld.outputBlobsWrappers, cudaInfo->workspace); - - for (auto id : ld.cudaD2HBackgroundTransfers) - { - auto wrapper = ld.outputBlobsWrappers[id].dynamicCast(); - wrapper->copyToHostInBackground(); - } -#endif - } - else if (preferableBackend == DNN_BACKEND_HALIDE) - { - forwardHalide(ld.outputBlobsWrappers, node); - } -#ifdef HAVE_INF_ENGINE - else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { - forwardNgraph(ld.outputBlobsWrappers, node, isAsync); - } -#endif - else if (preferableBackend == DNN_BACKEND_WEBNN) - { - forwardWebnn(ld.outputBlobsWrappers, node, isAsync); - } -#ifdef HAVE_VULKAN - else if (preferableBackend == DNN_BACKEND_VKCOM) - { - try - { - forwardVkCom(ld.outputBlobsWrappers, node); - } - catch (const cv::Exception& e) - { - CV_LOG_ERROR(NULL, "forwardVkCom failed, fallback to CPU implementation. " << e.what()); - it->second = Ptr(); - forwardLayer(ld); - } - } -#endif - else - { - CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); - } - } - - tm.stop(); - int64 t = tm.getTimeTicks(); - layersTimings[ld.id] = (t > 0) ? t : t + 1; // zero for skipped layers only - } - else - { - layersTimings[ld.id] = 0; - } - - ld.flag = 1; - } - - void forwardToLayer(LayerData &ld, bool clearFlags = true) - { - CV_TRACE_FUNCTION(); - - if (clearFlags) - { - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) - it->second.flag = 0; - } - - //already was forwarded - if (ld.flag) - return; - - //forward parents - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it) - { - LayerData &ld = it->second; - if (ld.flag) - continue; - forwardLayer(ld); - } - - //forward itself - forwardLayer(ld); - -#ifdef HAVE_CUDA - if (preferableBackend == DNN_BACKEND_CUDA) - cudaInfo->context.stream.synchronize(); -#endif - } - - void getQuantizationParams(const Mat& src, std::vector& scales, std::vector& zeropoints) - { - const int qmin = -128; // INT8_MIN - const int qmax = 127; // INT8_MAX - - double rmin, rmax, sc, zp; - cv::minMaxIdx(src, &rmin, &rmax); - - // 0 must be present in the range [rmin, rmax] - rmin = std::min(rmin, 0.0); - rmax = std::max(rmax, 0.0); - - sc = (rmax == rmin) ? 1.0 : (rmax - rmin)/(qmax - qmin); - zp = qmin - (rmin/sc); - - scales.push_back((float)sc); - zeropoints.push_back((int)std::round(zp)); - } - - void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes) - { - CV_CheckGE(id, 0, ""); - CV_CheckLT(id, (int)layers.size(), ""); - LayerData& layerData = layers[id]; - std::vector& inputLayerIds = layerData.inputBlobsId; - LayerShapes& layerShapes = inOutShapes[id]; - - if (id == 0 && layerShapes.in[0].empty()) - { - if (!layerData.outputBlobs.empty()) - { - ShapesVec shapes; - for (int i = 0; i < layerData.outputBlobs.size(); i++) - { - Mat& inp = layerData.outputBlobs[i]; - CV_Assert(!inp.empty()); - shapes.push_back(shape(inp)); - } - layerShapes.in = shapes; - } - else - { - const std::vector& inputShapes = netInputLayer->shapes; - bool none = true; - for (size_t i = 0; i < inputShapes.size(); i++) - { - if (!inputShapes[i].empty()) - { - none = false; - break; - } - } - if (none) - { - layerShapes.out.clear(); - return; - } - else - { - layerShapes.in = inputShapes; - } - } - } - - if (layerShapes.in.empty()) - { - for(int i = 0; i < inputLayerIds.size(); i++) - { - int layerId = inputLayerIds[i].lid; - LayersShapesMap::const_iterator it = - inOutShapes.find(layerId); - if(it == inOutShapes.end() || - it->second.out.empty()) - { - getLayerShapesRecursively(layerId, inOutShapes); - } - const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid]; - layerShapes.in.push_back(shape); - } - } - const ShapesVec& is = layerShapes.in; - ShapesVec& os = layerShapes.out; - ShapesVec& ints = layerShapes.internal; - int requiredOutputs = layerData.requiredOutputs.size(); - Ptr l = layerData.getLayerInstance(); - CV_Assert(l); - bool layerSupportInPlace = false; - try - { - layerSupportInPlace = l->getMemoryShapes(is, requiredOutputs, os, ints); - } - catch (const cv::Exception& e) - { - CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() throws exception." << - " inputs=" << is.size() << - " outputs=" << os.size() << "/" << requiredOutputs << - " blobs=" << l->blobs.size()); - for (size_t i = 0; i < is.size(); ++i) - { - CV_LOG_ERROR(NULL, " input[" << i << "] = " << toString(is[i])); - } - for (size_t i = 0; i < os.size(); ++i) - { - CV_LOG_ERROR(NULL, " output[" << i << "] = " << toString(os[i])); - } - for (size_t i = 0; i < l->blobs.size(); ++i) - { - CV_LOG_ERROR(NULL, " blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i]))); - } - CV_LOG_ERROR(NULL, "Exception message: " << e.what()); - throw; - } - layerShapes.supportInPlace = layerSupportInPlace; - - try - { - for (int i = 0; i < ints.size(); i++) - CV_CheckGT(total(ints[i]), 0, ""); - - for (int i = 0; i < os.size(); i++) - CV_CheckGT(total(os[i]), 0, ""); - } - catch (const cv::Exception& e) - { - CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() post validation failed." << - " inputs=" << is.size() << - " outputs=" << os.size() << "/" << requiredOutputs << - " blobs=" << l->blobs.size() << - " inplace=" << layerSupportInPlace); - for (size_t i = 0; i < is.size(); ++i) - { - CV_LOG_ERROR(NULL, " input[" << i << "] = " << toString(is[i])); - } - for (size_t i = 0; i < os.size(); ++i) - { - CV_LOG_ERROR(NULL, " output[" << i << "] = " << toString(os[i])); - } - for (size_t i = 0; i < l->blobs.size(); ++i) - { - CV_LOG_ERROR(NULL, " blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i]))); - } - CV_LOG_ERROR(NULL, "Exception message: " << e.what()); - throw; - } - } - - void getLayersShapes(const ShapesVec& netInputShapes, - LayersShapesMap& inOutShapes) - { - inOutShapes.clear(); - - inOutShapes[0].in = netInputShapes; //insert shape for first input layer - for (MapIdToLayerData::const_iterator it = layers.begin(); - it != layers.end(); it++) - { - getLayerShapesRecursively(it->first, inOutShapes); - } - } - - void getLayerShapes(const ShapesVec& netInputShapes, - const int layerId, - LayerShapes& shapes) - { - LayersShapesMap inOutShapes; - inOutShapes[0].in = netInputShapes; //insert shape for first input layer - getLayerShapesRecursively(layerId, inOutShapes); - shapes = inOutShapes[layerId]; - } - - void updateLayersShapes() - { - CV_LOG_DEBUG(NULL, "updateLayersShapes() with layers.size=" << layers.size()); - CV_Assert(netInputLayer); - DataLayer& inputLayer = *netInputLayer; - LayerData& inputLayerData = layers[0]; - CV_Assert(inputLayerData.layerInstance.get() == &inputLayer); - CV_Assert(!inputLayerData.outputBlobs.empty()); - ShapesVec inputShapes; - for(int i = 0; i < inputLayerData.outputBlobs.size(); i++) - { - Mat& inp = inputLayerData.outputBlobs[i]; - CV_Assert(!inp.empty()); - if (preferableBackend == DNN_BACKEND_OPENCV && // FIXIT: wrong place for output allocation - preferableTarget == DNN_TARGET_OPENCL_FP16 && - inputLayerData.dtype == CV_32F) - { - inp.create(inp.dims, inp.size, CV_16S); - } - inputShapes.push_back(shape(inp)); - } - CV_LOG_DEBUG(NULL, toString(inputShapes, "Network input shapes")); - LayersShapesMap layersShapes; - layersShapes[0].in = inputShapes; - for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) - { - int layerId = it->first; - LayerData& layerData = it->second; - const std::vector& inputLayerIds = layerData.inputBlobsId; - LayerShapes& layerShapes = layersShapes[layerId]; - CV_LOG_DEBUG(NULL, "layer " << layerId << ": [" << layerData.type << "]:(" << layerData.name << ") with inputs.size=" << inputLayerIds.size()); - if (layerShapes.in.empty()) - { - for(int i = 0; i < inputLayerIds.size(); i++) - { - const LayerPin& inputPin = inputLayerIds[i]; - int inputLayerId = inputPin.lid; - CV_LOG_DEBUG(NULL, " input[" << i << "] " << inputLayerId << ":" << inputPin.oid << " as [" << layers[inputLayerId].type << "]:(" << layers[inputLayerId].name << ")"); - LayersShapesMap::const_iterator inputIt = layersShapes.find(inputLayerId); - if (inputIt == layersShapes.end() || inputIt->second.out.empty()) - { - getLayerShapesRecursively(inputLayerId, layersShapes); - } - const MatShape& shape = layersShapes[inputLayerId].out[inputPin.oid]; - layerShapes.in.push_back(shape); - } - layerData.getLayerInstance()->updateMemoryShapes(layerShapes.in); - } - CV_LOG_DEBUG(NULL, "Layer " << layerId << ": " << toString(layerShapes.in, "input shapes")); - CV_LOG_IF_DEBUG(NULL, !layerShapes.out.empty(), "Layer " << layerId << ": " << toString(layerShapes.out, "output shapes")); - CV_LOG_IF_DEBUG(NULL, !layerShapes.internal.empty(), "Layer " << layerId << ": " << toString(layerShapes.internal, "internal shapes")); - } - CV_LOG_DEBUG(NULL, "updateLayersShapes() - DONE"); - } - - LayerPin getLatestLayerPin(const std::vector& pins) const - { - return *std::max_element(pins.begin(), pins.end()); - } - - Mat getBlob(const LayerPin& pin) const - { - CV_TRACE_FUNCTION(); - - if (!pin.valid()) - CV_Error(Error::StsObjectNotFound, "Requested blob not found"); - - MapIdToLayerData::const_iterator it = layers.find(pin.lid); - if (it == layers.end()) - CV_Error_(Error::StsOutOfRange, ("Layer #%d is not valid (output #%d requested)", pin.lid, pin.oid)); - - const LayerData &ld = it->second; - if ((size_t)pin.oid >= ld.outputBlobs.size()) - { - CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %zu outputs, " - "the #%d was requested", ld.name.c_str(), - ld.outputBlobs.size(), pin.oid)); - } - if (preferableTarget != DNN_TARGET_CPU) - { - CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty()); - // Transfer data to CPU if it's require. - ld.outputBlobsWrappers[pin.oid]->copyToHost(); - } - - if (ld.outputBlobs[pin.oid].depth() == CV_16S) - { - Mat output_blob; - convertFp16(ld.outputBlobs[pin.oid], output_blob); - return output_blob; - } - else - return ld.outputBlobs[pin.oid]; - } - - Mat getBlob(String outputName) const - { - return getBlob(getPinByAlias(outputName)); - } - -#ifdef CV_CXX11 - AsyncArray getBlobAsync(const LayerPin& pin) - { - CV_TRACE_FUNCTION(); -#ifdef HAVE_INF_ENGINE - if (!pin.valid()) - CV_Error(Error::StsObjectNotFound, "Requested blob not found"); - - LayerData &ld = layers[pin.lid]; - if ((size_t)pin.oid >= ld.outputBlobs.size()) - { - CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, " - "the #%d was requested", ld.name.c_str(), - (int)ld.outputBlobs.size(), (int)pin.oid)); - } - if (preferableTarget != DNN_TARGET_CPU) - { - CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty()); - // Transfer data to CPU if it's require. - ld.outputBlobsWrappers[pin.oid]->copyToHost(); - } - CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); - - Ptr wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast(); - return std::move(wrapper->futureMat); -#else - CV_Error(Error::StsNotImplemented, "DNN: OpenVINO/nGraph backend is required"); -#endif // HAVE_INF_ENGINE - } - - AsyncArray getBlobAsync(String outputName) - { - return getBlobAsync(getPinByAlias(outputName)); - } -#endif // CV_CXX11 - -#ifdef HAVE_INF_ENGINE - static - Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet); -#endif - - string dump() const; - - void dumpNetworkToFile() const - { -#ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP - string dumpFileNameBase = getDumpFileNameBase(); - string dumpFileName = dumpFileNameBase + ".dot"; - try - { - string dumpStr = dump(); - std::ofstream out(dumpFileName.c_str(), std::ios::out | std::ios::binary); - out << dumpStr; - } - catch (const std::exception& e) - { - std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out); - out << "Exception: " << e.what() << std::endl; - } - catch (...) - { - std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out); - out << "Can't dump: unknown exception" << std::endl; - } -#endif - } -}; - -Net::Net() : impl(new Net::Impl) -{ -} - -#ifdef HAVE_INF_ENGINE -/*static*/ -Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet) -{ - CV_TRACE_FUNCTION(); - - CV_TRACE_REGION("register_inputs"); - - std::vector inputsNames; - std::vector inp_shapes; - for (auto& it : ieNet.getInputsInfo()) - { - inputsNames.push_back(it.first); - std::vector dims = it.second->getTensorDesc().getDims(); - inp_shapes.push_back(std::vector(dims.begin(), dims.end())); - } - - Net cvNet; - cvNet.setInputsNames(inputsNames); - - // set empty input to determine input shapes - for (int inp_id = 0; inp_id < inputsNames.size(); ++inp_id) - { - cvNet.setInputShape(inputsNames[inp_id], inp_shapes[inp_id]); - } - - CV_TRACE_REGION_NEXT("backendNode"); - - Ptr backendNode; - { - auto fake_node = std::make_shared(ngraph::element::f32, ngraph::Shape{}); - Ptr backendNodeNGraph(new InfEngineNgraphNode(fake_node)); - backendNodeNGraph->net = Ptr(new InfEngineNgraphNet(*(cvNet.impl), ieNet)); - backendNode = backendNodeNGraph; - } - - CV_TRACE_REGION_NEXT("register_outputs"); - - auto ngraphFunction = ieNet.getFunction(); - CV_Assert(ngraphFunction); - std::vector< std::shared_ptr > ngraphOperations = ngraphFunction->get_ops(); - - for (auto& it : ieNet.getOutputsInfo()) - { - CV_TRACE_REGION("output"); - const auto& outputName = it.first; - - LayerParams lp; - int lid = cvNet.addLayer(it.first, "", lp); - - LayerData& ld = cvNet.impl->layers[lid]; - - { - Ptr cvLayer(new NgraphBackendLayer(ieNet)); - cvLayer->name = outputName; - cvLayer->type = "_unknown_"; - - auto process_layer = [&](const std::string& name) -> bool - { - CV_TRACE_REGION("ngraph_function"); - for (const auto& op : ngraphOperations) - { - CV_Assert(op); - if (op->get_friendly_name() == name) - { - const std::string typeName = op->get_type_info().name; - cvLayer->type = typeName; - return true; - } - } - return false; - }; - - bool found = process_layer(outputName); - if (!found) - { - auto pos = outputName.rfind('.'); // cut port number: ".0" - if (pos != std::string::npos) - { - std::string layerName = outputName.substr(0, pos); - found = process_layer(layerName); - } - } - if (!found) - CV_LOG_WARNING(NULL, "DNN/IE: Can't determine output layer type: '" << outputName << "'"); - - ld.layerInstance = cvLayer; - ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NGRAPH] = backendNode; - } - - for (int i = 0; i < inputsNames.size(); ++i) - cvNet.connect(0, i, lid, i); - } - - CV_TRACE_REGION_NEXT("finalize"); - - cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); - - cvNet.impl->skipInfEngineInit = true; - return cvNet; -} -#endif // HAVE_INF_ENGINE - -Net Net::readFromModelOptimizer(const String& xml, const String& bin) -{ - CV_TRACE_FUNCTION(); -#ifndef HAVE_INF_ENGINE - CV_UNUSED(xml); CV_UNUSED(bin); - CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer."); -#else - - FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; - - InferenceEngine::Core& ie = getCore(""); - InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin); - - return Impl::createNetworkFromModelOptimizer(ieNet); -#endif // HAVE_INF_ENGINE -} - -Net Net::readFromModelOptimizer(const std::vector& bufferModelConfig, const std::vector& bufferWeights) -{ - CV_TRACE_FUNCTION(); - CV_Assert(!bufferModelConfig.empty()); - CV_Assert(!bufferWeights.empty()); - return readFromModelOptimizer(bufferModelConfig.data(), bufferModelConfig.size(), - bufferWeights.data(), bufferWeights.size()); -} - -Net Net::readFromModelOptimizer( - const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, - const uchar* bufferWeightsPtr, size_t bufferWeightsSize -) -{ - CV_TRACE_FUNCTION(); -#ifndef HAVE_INF_ENGINE - CV_UNUSED(bufferModelConfigPtr); CV_UNUSED(bufferWeightsPtr); - CV_UNUSED(bufferModelConfigSize); CV_UNUSED(bufferModelConfigSize); - CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer."); -#else - - FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; - - InferenceEngine::Core& ie = getCore(""); - - std::string model; model.assign((char*)bufferModelConfigPtr, bufferModelConfigSize); - - InferenceEngine::CNNNetwork ieNet; - try - { - InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C); - InferenceEngine::Blob::CPtr weights_blob = InferenceEngine::make_shared_blob(tensorDesc, (uint8_t*)bufferWeightsPtr, bufferWeightsSize); - - ieNet = ie.ReadNetwork(model, weights_blob); - } - catch (const std::exception& e) - { - CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what()); - } - - return Impl::createNetworkFromModelOptimizer(ieNet); -#endif // HAVE_INF_ENGINE -} - - -Net::~Net() -{ -} - -int Net::addLayer(const String &name, const String &type, const int &dtype, LayerParams ¶ms) -{ - CV_TRACE_FUNCTION(); - CV_Assert(impl); - return impl->addLayer(name, type, dtype, params); -} - -int Net::addLayer(const String &name, const String &type, LayerParams ¶ms) -{ - CV_TRACE_FUNCTION(); - return addLayer(name, type, CV_32F, params); -} - -int Net::addLayerToPrev(const String &name, const String &type, const int &dtype, LayerParams ¶ms) -{ - CV_TRACE_FUNCTION(); - - int prvLid = impl->lastLayerId; - int newLid = this->addLayer(name, type, dtype, params); - this->connect(prvLid, 0, newLid, 0); - return newLid; -} - -int Net::addLayerToPrev(const String &name, const String &type, LayerParams ¶ms) -{ - CV_TRACE_FUNCTION(); - return addLayerToPrev(name, type, CV_32F, params); -} - -void Net::connect(int outLayerId, int outNum, int inpLayerId, int inpNum) -{ - CV_TRACE_FUNCTION(); - - impl->connect(outLayerId, outNum, inpLayerId, inpNum); -} - -void Net::connect(String _outPin, String _inPin) -{ - CV_TRACE_FUNCTION(); - - LayerPin outPin = impl->getPinByAlias(_outPin); - LayerPin inpPin = impl->getPinByAlias(_inPin); - - CV_Assert(outPin.valid() && inpPin.valid()); - - impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid); -} - -int Net::registerOutput(const std::string& outputName, int layerId, int outputPort) -{ - CV_TRACE_FUNCTION(); - CV_Assert(impl); - return impl->registerOutput(outputName, layerId, outputPort); -} - -Mat Net::forward(const String& outputName) -{ - CV_TRACE_FUNCTION(); - CV_Assert(!empty()); - FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; - - String layerName = outputName; - - if (layerName.empty()) - { - std::vector layerNames = getLayerNames(); - CV_Assert(!layerNames.empty()); - layerName = layerNames.back(); - } - - std::vector pins(1, impl->getPinByAlias(layerName)); - impl->setUpNet(pins); - impl->forwardToLayer(impl->getLayerData(layerName)); - - return impl->getBlob(layerName); -} - -AsyncArray Net::forwardAsync(const String& outputName) -{ - CV_TRACE_FUNCTION(); - CV_Assert(!empty()); - FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; - -#ifdef CV_CXX11 - String layerName = outputName; - - if (layerName.empty()) - { - std::vector layerNames = getLayerNames(); - CV_Assert(!layerNames.empty()); - layerName = layerNames.back(); - } - - std::vector pins(1, impl->getPinByAlias(layerName)); - impl->setUpNet(pins); - - if (impl->preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward is supported for Inference Engine backend only"); - - impl->isAsync = true; - impl->forwardToLayer(impl->getLayerData(layerName)); - impl->isAsync = false; - - return impl->getBlobAsync(layerName); -#else - CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward requires build with enabled C++11"); -#endif // CV_CXX11 -} - -void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName) -{ - CV_TRACE_FUNCTION(); - CV_Assert(!empty()); - FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; - - String layerName = outputName; - - if (layerName.empty()) - { - std::vector layerNames = getLayerNames(); - CV_Assert(!layerNames.empty()); - layerName = layerNames.back(); - } - - std::vector pins(1, impl->getPinByAlias(layerName)); - impl->setUpNet(pins); - impl->forwardToLayer(impl->getLayerData(layerName)); - - LayerPin pin = impl->getPinByAlias(layerName); - LayerData &ld = impl->layers[pin.lid]; - - if (outputBlobs.isUMat()) - { - impl->getBlob(layerName).copyTo(outputBlobs); - } - else if (outputBlobs.isMat()) - { - outputBlobs.assign(impl->getBlob(layerName)); - } - else if (outputBlobs.isMatVector()) - { - if (impl->preferableTarget != DNN_TARGET_CPU) - { - for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) - { - CV_Assert(!ld.outputBlobsWrappers[i].empty()); - ld.outputBlobsWrappers[i]->copyToHost(); - } - } - if (ld.outputBlobs[0].depth() == CV_16S) - { - std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); - outputvec.resize(ld.outputBlobs.size()); - for (int i = 0; i < outputvec.size(); i++) - convertFp16(ld.outputBlobs[i], outputvec[i]); - } - else - { - // Output depth can be CV_32F or CV_8S - std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); - outputvec = ld.outputBlobs; - } - } - else if (outputBlobs.isUMatVector()) - { - std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); - -#ifdef HAVE_OPENCL - if (impl->preferableBackend == DNN_BACKEND_OPENCV && - IS_DNN_OPENCL_TARGET(impl->preferableTarget)) - { - if (impl->preferableTarget == DNN_TARGET_OPENCL) - outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); - else if (impl->preferableTarget == DNN_TARGET_OPENCL_FP16) - { - std::vector out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); - outputvec.resize(out_vec.size()); - for (int i = 0; i < out_vec.size(); i++) - convertFp16(out_vec[i], outputvec[i]); - } - } - else -#endif - { - outputvec.resize(ld.outputBlobs.size()); - for (int i = 0; i < outputvec.size(); ++i) - ld.outputBlobs[i].copyTo(outputvec[i]); - } - } -} - -void Net::forward(OutputArrayOfArrays outputBlobs, - const std::vector& outBlobNames) -{ - CV_TRACE_FUNCTION(); - FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; - - std::vector pins; - for (int i = 0; i < outBlobNames.size(); i++) - { - pins.push_back(impl->getPinByAlias(outBlobNames[i])); - } - - impl->setUpNet(pins); - - LayerPin out = impl->getLatestLayerPin(pins); - - impl->forwardToLayer(impl->getLayerData(out.lid)); - - std::vector matvec; - for (int i = 0; i < pins.size(); i++) - { - matvec.push_back(impl->getBlob(pins[i])); - } - - outputBlobs.create((int)matvec.size(), 1, CV_32F/*FIXIT*/, -1); // allocate vector - outputBlobs.assign(matvec); -} - -void Net::forward(std::vector >& outputBlobs, - const std::vector& outBlobNames) -{ - CV_TRACE_FUNCTION(); - FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; - - std::vector pins; - for (int i = 0; i < outBlobNames.size(); i++) - { - pins.push_back(impl->getPinByAlias(outBlobNames[i])); - } - - impl->setUpNet(pins); - - LayerPin out = impl->getLatestLayerPin(pins); - - impl->forwardToLayer(impl->getLayerData(out.lid)); - - outputBlobs.resize(outBlobNames.size()); - for (int i = 0; i < outBlobNames.size(); i++) - { - std::vector lp = impl->getLayerOutPins(outBlobNames[i]); - outputBlobs[i].resize(lp.size()); - for (int j = 0; j < lp.size(); j++) - { - outputBlobs[i][j] = impl->getBlob(lp[j]); - } - } -} - -Net Net::quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype) -{ - CV_TRACE_FUNCTION(); - - // Net can be quantized only once. - if (impl->netWasQuantized) - CV_Error(Error::StsBadArg, "Cannot quantize a quantized net"); - - CV_CheckType(inputsDtype, inputsDtype == CV_32F || inputsDtype == CV_8S, "Input depth should be CV_32F or CV_8S"); - CV_CheckType(outputsDtype, outputsDtype == CV_32F || outputsDtype == CV_8S, "Output depth should be CV_32F or CV_8S"); - - bool originalFusion = impl->fusion; - int prefBackend = impl->preferableBackend; - int prefTarget = impl->preferableTarget; - - // Disable fusions and use CPU backend to quantize net - setPreferableBackend(DNN_BACKEND_OPENCV); - setPreferableTarget(DNN_TARGET_CPU); - enableFusion(false); - - if (calibData.isMat()) - { - setInput(calibData.getMat()); - } - else if (calibData.isMatVector()) - { - std::vector calibDataVec; - calibData.getMatVector(calibDataVec); - - std::vector inpNames = impl->netInputLayer->outNames; - CV_CheckEQ(calibDataVec.size(), inpNames.size(), "Calibration data size should be equal to number of inputs"); - for (int i = 0; i < calibDataVec.size(); i++) - setInput(calibDataVec[i], inpNames[i]); - } - - std::vector outNames = getUnconnectedOutLayersNames(); - std::vector pins; - for (int i = 0; i < outNames.size(); i++) - pins.push_back(impl->getPinByAlias(outNames[i])); - impl->setUpNet(pins); - - // Compute scales and zeropoints for all the layers - std::vector > scales; - std::vector > zeropoints; - for (Impl::MapIdToLayerData::iterator it = impl->layers.begin(); it != impl->layers.end(); it++) - { - LayerData& ld = it->second; - if (!ld.skip) - { - Ptr layer = ld.layerInstance; - std::vector inps(ld.inputBlobs.size()); - for (int i = 0; i < ld.inputBlobs.size(); ++i) - inps[i] = *ld.inputBlobs[i]; - layer->forward(inps, ld.outputBlobs, ld.internals); - } - - std::vector sc; - std::vector zp; - if (ld.type == "TanH") - { - sc.push_back(1.f/128); - zp.push_back(0); - } - else if (ld.type == "Sigmoid" || ld.type == "Softmax" || ld.type == "SoftMax") - { - if (ld.params.get("log_softmax", false)) - { - sc.push_back(16.f/256); - zp.push_back(127); - } - else - { - sc.push_back(1.f/256); - zp.push_back(-128); - } - } - else if (ld.type == "Split" || ld.type == "Slice" || ld.type == "Crop") - { - std::vector inp_sc; std::vector inp_zp; - impl->getQuantizationParams(*ld.inputBlobs[0], inp_sc, inp_zp); - sc.assign(ld.outputBlobs.size(), inp_sc[0]); - zp.assign(ld.outputBlobs.size(), inp_zp[0]); - } - else - { - for (int i = 0; i < ld.outputBlobs.size(); i++) - impl->getQuantizationParams(ld.outputBlobs[i], sc, zp); - } - scales.push_back(sc); - zeropoints.push_back(zp); - } - - // For some layers, the input and output scales/zeropoints must be equal so that rescaling of inputs - // is not needed during quantized inference. We start from the last layer and modify the layer's input scales/zeropoints - // TODO : Need a different approach. Current solution fails when 2 such layers have the same input layer - for (Impl::MapIdToLayerData::reverse_iterator it = impl->layers.rbegin(); it != impl->layers.rend(); ++it) - { - LayerData& ld = it->second; - // Layers with multiple outputs. Number of outputs is equal to number of inputs - if (ld.type == "Blank" || ld.type == "Dropout" || ld.type == "Identity" || ld.type == "Silence" || - ld.type == "Flatten" || ld.type == "Padding" || ld.type == "Permute" || ld.type == "Reshape" || - ld.type == "ReLU6" || ld.type == "Reorg" || ld.type == "ShuffleChannel" || ld.type == "Resize" || - (ld.type == "ReLU" && !ld.params.get("negative_slope", 0.f)) /* ReLU with negative slope 0 */) - { - for (int i = 0; i < ld.outputBlobs.size(); i++) - { - LayerPin &pin = ld.inputBlobsId[i]; - scales[pin.lid][pin.oid] = scales[ld.id][i]; - zeropoints[pin.lid][pin.oid] = zeropoints[ld.id][i]; - } - } - // Layers with multiple inputs and single output. - else if ((ld.type == "Pooling" && toLowerCase(ld.params.get("pool", "max")) == "max") /* Max Pooling */ || - (ld.type == "Eltwise" && toLowerCase(ld.params.get("operation", "sum")) == "max") /* Elementwise max */ || - ld.type == "Concat") - { - for (int i = 0; i < ld.inputBlobsId.size(); i++) - { - LayerPin &pin = ld.inputBlobsId[i]; - scales[pin.lid][pin.oid] = scales[ld.id][0]; - zeropoints[pin.lid][pin.oid] = zeropoints[ld.id][0]; - } - } - } - - // Create a new Net and add quantized layers to it. - Net dstNet; - dstNet.impl->netWasQuantized = true; - dstNet.setInputsNames(impl->netInputLayer->outNames); - dstNet.setPreferableBackend(prefBackend); - dstNet.setPreferableTarget(prefTarget); - dstNet.enableFusion(originalFusion); - - for (Impl::MapIdToLayerData::iterator it = impl->layers.begin(); it != impl->layers.end(); it++) - { - LayerData ld = it->second; - if (ld.id == 0) - { - LayerData &quantInpLd = dstNet.impl->layers[0]; - quantInpLd.dtype = inputsDtype; - quantInpLd.params.set("scales", DictValue::arrayReal(scales[0].data(), scales[0].size())); - quantInpLd.params.set("zeropoints", DictValue::arrayInt(zeropoints[0].data(), zeropoints[0].size())); - continue; - } - - std::vector inpPins = ld.inputBlobsId; - // Fill input and output scales/zeropoints for the layer - std::vector > inp_out_sc(2); - std::vector > inp_out_zp(2); - for (int i = 0; i < inpPins.size(); i++) - { - LayerPin &pin = inpPins[i]; - inp_out_sc[0].push_back(scales[pin.lid][pin.oid]); - inp_out_zp[0].push_back(zeropoints[pin.lid][pin.oid]); - } - inp_out_sc[1] = scales[ld.id]; - inp_out_zp[1] = zeropoints[ld.id]; - - // Quantize layer - Ptr layer = ld.layerInstance; - if (layer->tryQuantize(inp_out_sc, inp_out_zp, ld.params)) - { - ld.type += "Int8"; - ld.dtype = CV_8S; - } - ld.params.set("scales", DictValue::arrayReal(inp_out_sc[1].data(), inp_out_sc[1].size())); - ld.params.set("zeropoints", DictValue::arrayInt(inp_out_zp[1].data(), inp_out_zp[1].size())); - - // Check and add quantize/dequantize node before layer - for (int i = 0; i < inpPins.size(); i++) - { - LayerPin &pin = inpPins[i]; - LayerData &inpLd = dstNet.impl->getLayerData(impl->getLayerName(pin.lid)); - pin.lid = inpLd.id; - if (inpLd.dtype != ld.dtype) - { - String layerName = (inpLd.dtype == CV_32F && ld.dtype == CV_8S) ? cv::format("quantize/%s/%d", inpLd.name.c_str(), pin.oid) - : cv::format("dequantize/%s/%d", inpLd.name.c_str(), pin.oid); - // Check if quantize/dequantize node for the input layer already exists - if (dstNet.impl->getLayerId(layerName) >= 0) - { - pin.lid = dstNet.impl->getLayerId(layerName); - pin.oid = 0; - } - else - { - LayerParams lp; - lp.set("scales", inp_out_sc[0][i]); - lp.set("zeropoints", inp_out_zp[0][i]); - lp.name = layerName; - lp.type = (inpLd.dtype == CV_32F && ld.dtype == CV_8S) ? "Quantize" : "Dequantize"; - int newLid = dstNet.addLayer(lp.name, lp.type, ld.dtype, lp); - dstNet.connect(pin.lid, pin.oid, newLid, 0); - pin.lid = newLid; pin.oid = 0; - } - } - } - - // Add quantized layer to Net and connect to its inputs. - int newLid = dstNet.addLayer(ld.name, ld.type, ld.dtype, ld.params); - for( int i = 0; i < inpPins.size(); i++ ) - dstNet.connect(inpPins[i].lid, inpPins[i].oid, newLid, i); - - // If the layer is a output layer, add quantize/dequantize node after it based on output's data type. - if (ld.requiredOutputs.size() == 0 && ld.dtype != outputsDtype) - { - LayerParams lp; - lp.set("scales", inp_out_sc[1][0]); - lp.set("zeropoints", inp_out_zp[1][0]); - lp.name = ((ld.dtype == CV_32F && outputsDtype == CV_8S) ? "quantize/" : "dequantize/") + ld.name; - lp.type = (ld.dtype == CV_32F && outputsDtype == CV_8S) ? "Quantize" : "Dequantize"; - dstNet.addLayerToPrev(lp.name, lp.type, outputsDtype, lp); - } - } - // Restore FP32 Net's backend, target and fusion - setPreferableBackend(prefBackend); - setPreferableTarget(prefTarget); - enableFusion(originalFusion); - return dstNet; -} - -void Net::getInputDetails(std::vector& scales, std::vector& zeropoints) const -{ - if (!impl->netWasQuantized) - CV_Error(Error::StsBadFunc, "Net isn't quantized"); - - LayerParams &lp = impl->layers[0].params; - DictValue sc = lp.get("scales"); - DictValue zp = lp.get("zeropoints"); - - for (int i = 0; i < sc.size(); i++) - { - scales.push_back(sc.get(i)); - zeropoints.push_back(zp.get(i)); - } -} - -void Net::getOutputDetails(std::vector& scales, std::vector& zeropoints) const -{ - if (!impl->netWasQuantized) - CV_Error(Error::StsBadFunc, "Net isn't quantized"); - - std::vector outLayerIds = getUnconnectedOutLayers(); - for (auto &lid : outLayerIds) - { - LayerParams &lp = impl->layers[lid].params; - DictValue sc = lp.get("scales"); - DictValue zp = lp.get("zeropoints"); - - for (int i = 0; i < sc.size(); i++) - { - scales.push_back(sc.get(i)); - zeropoints.push_back(zp.get(i)); - } - } -} - -void Net::setPreferableBackend(int backendId) -{ - CV_TRACE_FUNCTION(); - CV_TRACE_ARG(backendId); - - if (backendId == DNN_BACKEND_DEFAULT) - backendId = (Backend)PARAM_DNN_BACKEND_DEFAULT; - - if (impl->netWasQuantized && backendId != DNN_BACKEND_OPENCV) - { - CV_LOG_WARNING(NULL, "DNN: Only default backend supports quantized networks"); - backendId = DNN_BACKEND_OPENCV; - } - -#ifdef HAVE_INF_ENGINE - if (backendId == DNN_BACKEND_INFERENCE_ENGINE) - backendId = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; -#endif - - if( impl->preferableBackend != backendId ) - { - impl->preferableBackend = backendId; - impl->clear(); - } -} - -void Net::setPreferableTarget(int targetId) -{ - CV_TRACE_FUNCTION(); - CV_TRACE_ARG(targetId); - - if (impl->netWasQuantized && targetId != DNN_TARGET_CPU && - targetId != DNN_TARGET_OPENCL && targetId != DNN_TARGET_OPENCL_FP16) - { - CV_LOG_WARNING(NULL, "DNN: Only CPU and OpenCL/OpenCL FP16 target is supported by quantized networks"); - targetId = DNN_TARGET_CPU; - } - - if( impl->preferableTarget != targetId ) - { - impl->preferableTarget = targetId; - if (IS_DNN_OPENCL_TARGET(targetId)) - { -#ifndef HAVE_OPENCL -#ifdef HAVE_INF_ENGINE - if (impl->preferableBackend == DNN_BACKEND_OPENCV) -#else - if (impl->preferableBackend == DNN_BACKEND_DEFAULT || - impl->preferableBackend == DNN_BACKEND_OPENCV) -#endif // HAVE_INF_ENGINE - impl->preferableTarget = DNN_TARGET_CPU; -#else - bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16"); - if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16) - impl->preferableTarget = DNN_TARGET_OPENCL; -#endif - } - impl->clear(); - } -} - -void Net::setInputsNames(const std::vector &inputBlobNames) -{ - CV_TRACE_FUNCTION(); - - impl->netInputLayer->setNames(inputBlobNames); -} - -void Net::setInputShape(const String &inputName, const MatShape& shape) -{ - CV_TRACE_FUNCTION(); - - impl->netInputLayer->setInputShape(inputName, shape); -} - -void Net::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean) -{ - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; - - LayerPin pin; - pin.lid = 0; - pin.oid = impl->resolvePinOutputName(impl->getLayerData(pin.lid), name); - - if (!pin.valid()) - CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found"); - - Mat blob_ = blob.getMat(); // can't use InputArray directly due MatExpr stuff - MatShape blobShape = shape(blob_); - - if (pin.lid == 0) - { - CV_Assert(!impl->netInputLayer.empty()); - const DataLayer& netInputLayer = *impl->netInputLayer.get(); - if (!netInputLayer.shapes.empty()) - { - CV_CheckLT(pin.oid, (int)netInputLayer.shapes.size(), ""); - const MatShape& inputShapeLimitation = netInputLayer.shapes[pin.oid]; - if (!inputShapeLimitation.empty()) - { - CV_CheckEQ(inputShapeLimitation.size(), blobShape.size(), ""); -#if 0 // TODO: DNNTestNetwork.MobileNet_SSD_Caffe_Different_Width_Height/0 - const size_t dims = inputShapeLimitation.size(); - for (size_t dim = 0; dim < dims; dim++) - { - if (dims >= 3 && dim == 0 && inputShapeLimitation[0] == 1) - continue; // don't limit batch - CV_CheckEQ(inputShapeLimitation[dim], blobShape[dim], ""); - } -#endif - } - } - } - - LayerData &ld = impl->layers[pin.lid]; - const int numInputs = std::max(pin.oid+1, (int)ld.requiredOutputs.size()); - ld.outputBlobs.resize(numInputs); - ld.outputBlobsWrappers.resize(numInputs); - impl->netInputLayer->inputsData.resize(numInputs); - impl->netInputLayer->scaleFactors.resize(numInputs); - impl->netInputLayer->means.resize(numInputs); - - MatShape prevShape = shape(impl->netInputLayer->inputsData[pin.oid]); - bool oldShape = prevShape == blobShape; - - blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]); - if (!oldShape) - ld.outputBlobs[pin.oid] = impl->netInputLayer->inputsData[pin.oid]; - - if (!ld.outputBlobsWrappers[pin.oid].empty()) - { - ld.outputBlobsWrappers[pin.oid]->setHostDirty(); - } - impl->netInputLayer->scaleFactors[pin.oid] = scalefactor; - impl->netInputLayer->means[pin.oid] = mean; - impl->netWasAllocated = impl->netWasAllocated && oldShape; -} - -Mat Net::getParam(int layer, int numParam) const -{ - LayerData &ld = impl->getLayerData(layer); - std::vector &layerBlobs = ld.getLayerInstance()->blobs; - CV_Assert(numParam < (int)layerBlobs.size()); - return layerBlobs[numParam]; -} - -void Net::setParam(int layer, int numParam, const Mat &blob) -{ - LayerData &ld = impl->getLayerData(layer); - - std::vector &layerBlobs = ld.getLayerInstance()->blobs; - CV_Assert(numParam < (int)layerBlobs.size()); - //we don't make strong checks, use this function carefully - layerBlobs[numParam] = blob; -} - -int Net::getLayerId(const String &layer) const -{ - return impl->getLayerId(layer); -} - -static -string dumpLayerParameterSize(const string& name, const LayerParams& lp) -{ - std::ostringstream out(name, std::ios::ate); - DictValue param = lp.get(name); - switch (param.size()) - { - case 1: out << " : "; break; - case 2: out << " (HxW): "; break; - case 3: out << " (DxHxW): "; break; - default: - CV_LOG_INFO(NULL, format("DNN/dumpLayerParameterSize(): Unsupported '%s' size = %d", name.c_str(), param.size())); - out << ": "; - } - for (size_t i = 0; i < param.size(); i++) - { - if (i > 0) - out << " x "; - out << param.get(i); - } - return out.str(); -} - -String Net::dump() -{ - CV_Assert(!empty()); - - bool hasInput = !impl->netInputLayer->inputsData.empty(); - - if (hasInput) - { - if (!impl->netWasAllocated) - impl->setUpNet(); - } - - return impl->dump(); -} - -string Net::Impl::dump() const -{ - bool hasInput = !netInputLayer->inputsData.empty(); - - std::ostringstream out; - const std::map& map = layers; - - Backend prefBackend = (Backend)preferableBackend; - std::vector > skippedLayers; - std::vector skipId; - std::vector allLayers(map.size(), -1); - int idPrev = -1; - Ptr prevNode; - for (std::map::const_reverse_iterator rit = map.rbegin(); rit != map.rend(); ++rit) - { - std::map >::const_iterator itBackend = rit->second.backendNodes.find(prefBackend); - if (prefBackend == DNN_BACKEND_OPENCV || itBackend == rit->second.backendNodes.end() || - itBackend->second.empty()) - { - if (rit->second.skip) - skipId.push_back(rit->first); - else if (!skipId.empty()) - { - if (prefBackend == DNN_BACKEND_OPENCV || prevNode.empty()) - skipId.push_back(rit->first); - else if (idPrev != -1) - skipId.push_back(idPrev); - - std::sort(skipId.begin(), skipId.end()); - for (int i = 0; i < skipId.size(); i++) { - allLayers[skipId[i]] = skippedLayers.size(); - } - skippedLayers.push_back(skipId); - skipId.clear(); - } - } - else - { - if (itBackend->second == prevNode) - skipId.push_back(idPrev); - else if (!skipId.empty()) - { - skipId.push_back(idPrev); - std::sort(skipId.begin(), skipId.end()); - for (int i = 0; i < skipId.size(); i++) { - allLayers[skipId[i]] = skippedLayers.size(); - } - skippedLayers.push_back(skipId); - skipId.clear(); - } - idPrev = rit->first; - prevNode = itBackend->second; - } - } - std::vector colors = {"#ffffb3", "#fccde5", "#8dd3c7", "#bebada", "#80b1d3", "#fdb462", "#ff4848", "#b35151", "#b266ff"}; - string backend; - switch (prefBackend) - { - case DNN_BACKEND_DEFAULT: backend = "DEFAULT/"; break; - case DNN_BACKEND_HALIDE: backend = "HALIDE/"; break; - case DNN_BACKEND_INFERENCE_ENGINE: // fallthru - case DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019: // fallthru - case DNN_BACKEND_INFERENCE_ENGINE_NGRAPH: backend = "OpenVINO/"; break; - case DNN_BACKEND_OPENCV: backend = "OCV/"; break; - case DNN_BACKEND_VKCOM: backend = "VULKAN/"; break; - case DNN_BACKEND_CUDA: backend = "CUDA/"; break; - case DNN_BACKEND_WEBNN: backend = "WEBNN/"; break; - // don't use default: - } - out << "digraph G {\n"; - // Add nodes - for (std::map::const_iterator it = map.begin(); it != map.end(); ++it) - { - const LayerData& ld = it->second; - string name = ld.params.name; - std::vector clusterIds(1, it->first); - if (allLayers[it->first] == -1 && !name.empty()) - { - out << "\t\"" << name << "\" [label=\""; - } - else if (name.empty() || it->first != skippedLayers[allLayers[it->first]][0]) - { - continue; - } - else // first node in cluster : it->first == skippedLayers[allLayers[it->first]][0] - { - int cluster = allLayers[it->first]; - out << "\t\"" << "cluster_" << cluster << "\" [label=\"{"; - clusterIds = skippedLayers[allLayers[it->first]]; // vertices in current cluster - } - for (int i = 0; i < clusterIds.size(); i++) - { - CV_DbgAssert(map.find(clusterIds[i]) != map.end()); - const LayerParams& lp = map.find(clusterIds[i])->second.params; - if (!lp.name.empty()) { - if (i > 0) { - out << " | "; - } - out << lp.name << "\\n" << lp.type << "\\n"; // align center - if (lp.has("kernel_size")) - { - string kernel = dumpLayerParameterSize("kernel_size", lp); - out << kernel; - out << "\\l"; // align left - } else if (lp.has("kernel_h") && lp.has("kernel_w")) { - DictValue h = lp.get("kernel_h"); - DictValue w = lp.get("kernel_w"); - out << "kernel (HxW): " << h << " x " << w; - out << "\\l"; // align left - } - if (lp.has("stride")) { - string stride = dumpLayerParameterSize("stride", lp); - out << stride; - out << "\\l"; // align left - } else if (lp.has("stride_h") && lp.has("stride_w")) { - DictValue h = lp.get("stride_h"); - DictValue w = lp.get("stride_w"); - out << "stride (HxW): " << h << " x " << w; - out << "\\l"; // align left - } - if (lp.has("dilation")) { - string dilation = dumpLayerParameterSize("dilation", lp); - out << dilation; - out << "\\l"; // align left - } else if (lp.has("dilation_h") && lp.has("dilation_w")) { - DictValue h = lp.get("dilation_h"); - DictValue w = lp.get("dilation_w"); - out << "dilation (HxW): " << h << " x " << w; - out << "\\l"; // align left - } - if (lp.has("pad")) { - DictValue pad = lp.get("pad"); - out << "pad "; - switch (pad.size()) - { - case 1: out << ": " << pad; break; - case 2: - out << "(HxW): (" << pad.get(0) << " x " << pad.get(1) << ")"; - break; - case 4: - out << "(HxW): (" << pad.get(0) << ", " << pad.get(2) - << ") x (" << pad.get(1) << ", " << pad.get(3) << ")"; - break; - case 6: - out << "(DxHxW): (" << pad.get(0) << ", " << pad.get(3) - << ") x (" << pad.get(1) << ", " << pad.get(4) - << ") x (" << pad.get(2) << ", " << pad.get(5) << ")"; - break; - default: CV_Error(Error::StsNotImplemented, format("Unsupported pad size = %d", pad.size())); - } - out << "\\l"; // align left - } else if (lp.has("pad_l") && lp.has("pad_t") && lp.has("pad_r") && lp.has("pad_b")) { - DictValue l = lp.get("pad_l"); - DictValue t = lp.get("pad_t"); - DictValue r = lp.get("pad_r"); - DictValue b = lp.get("pad_b"); - out << "pad (HxW): (" << t << ", " << b << ") x (" << l << ", " << r << ")"; - out << "\\l"; // align left - } - else if (lp.has("pooled_w") || lp.has("pooled_h")) { - DictValue h = lp.get("pooled_h"); - DictValue w = lp.get("pooled_w"); - out << "pad pooled (HxW): " << h << " x " << w; - out << "\\l"; // align left - } - if (lp.has("pool")) { - out << "pool: " << lp.get("pool"); - out << "\\l"; // align left - } - if (lp.has("global_pooling")) { - out << "global_pooling: " << lp.get("global_pooling"); - out << "\\l"; // align left - } - if (lp.has("group")) { - out << "group: " << lp.get("group"); - out << "\\l"; // align left - } - } - } - if (!ld.outputBlobs.empty()) - { - out << "output: " << ld.outputBlobs[0].size; - out << "\\l"; // align left - } - - Ptr layerBackend; - std::map >::const_iterator ibn = ld.backendNodes.find(prefBackend); - if (ibn != ld.backendNodes.end()) - layerBackend = ibn->second; - out << (!layerBackend.empty() ? backend : "OCV/"); - int colorId = 0; - const Target target = ld.layerInstance.empty() - ? DNN_TARGET_CPU - : (Target)(ld.layerInstance->preferableTarget); // TODO fix preferableTarget type - switch (target) - { - case DNN_TARGET_CPU: out << "CPU"; colorId = layerBackend.empty() ? 0 : 5; break; - case DNN_TARGET_OPENCL: out << "OCL"; colorId = 1; break; - case DNN_TARGET_OPENCL_FP16: out << "OCL_FP16"; colorId = 2; break; - case DNN_TARGET_MYRIAD: out << "MYRIAD"; colorId = 3; break; - case DNN_TARGET_HDDL: out << "HDDL"; colorId = 8; break; - case DNN_TARGET_VULKAN: out << "VULKAN"; colorId = 7; break; - case DNN_TARGET_FPGA: out << "FPGA"; colorId = 4; break; - case DNN_TARGET_CUDA: out << "CUDA"; colorId = 5; break; - case DNN_TARGET_CUDA_FP16: out << "CUDA_FP16"; colorId = 6; break; - // don't use default: - } - CV_Assert(colorId < colors.size()); - out << "\\n"; // align center - out << ((clusterIds.size() == 1)? "\" " : " }\" "); - out << "fillcolor=\"" << colors[colorId] << "\" "; - out << "style=filled "; - out << "shape=" << ((clusterIds.size() == 1)? "box" : "record") << "]\n"; - } - out << '\n'; - // Add edges - int inputsSize = hasInput ? netInputLayer->outNames.size() : 0; - for (std::map::const_iterator it = map.begin(); it != map.end(); ++it) - { - const LayerData& ld = it->second; - if (allLayers[it->first] == -1) // node - { - for (int i = 0; i < ld.consumers.size(); i++) - { - int outId = ld.consumers[i].lid; - if (it == map.begin() && inputsSize > 1) - out << "\t\"" << ld.name << "_" << i << "\"" << " -> "; - else - out << "\t\"" << ld.name << "\"" << " -> "; - if (allLayers[outId] == -1) // node - { - CV_DbgAssert(map.find(outId) != map.end()); - out << "\"" << map.find(outId)->second.name << "\"\n"; - } - else // cluster - { - out << "\"" << "cluster_" << allLayers[outId] << "\"\n"; - } - } - } - else if (it->first == skippedLayers[allLayers[it->first]].back()) // edges from last layer in cluster - { - for (int i = 0; i < ld.consumers.size(); i++) - { - int outId = ld.consumers[i].lid; - if (allLayers[outId] == -1) // node - { - CV_DbgAssert(map.find(outId) != map.end()); - out << "\t\"" << "cluster_" << allLayers[it->first] << "\"" << " -> "; - out << "\"" << map.find(outId)->second.name << "\"\n"; - } - else if (allLayers[outId] != allLayers[it->first]) { // another cluster - out << "\t\"" << "cluster_" << allLayers[it->first] << "\"" << " -> "; - out << "\"" << "cluster_" << allLayers[outId] << "\"\n"; - } - } - } - } - out << "}\n"; - return out.str(); -} - -void Net::dumpToFile(const String& path) { - std::ofstream file(path.c_str()); - file << dump(); - file.close(); -} - -Ptr Net::getLayer(int layerId) const -{ - LayerData &ld = impl->getLayerData(layerId); - return ld.getLayerInstance(); -} -Ptr Net::getLayer(const LayerId& layerId) const -{ - LayerData &ld = impl->getLayerData(layerId); - return ld.getLayerInstance(); -} - -std::vector > Net::getLayerInputs(int layerId) const -{ - LayerData &ld = impl->getLayerData(layerId); - - std::vector > inputLayers; - inputLayers.reserve(ld.inputBlobsId.size()); - for (int i = 0; i < ld.inputBlobsId.size(); ++i) { - inputLayers.push_back(getLayer(ld.inputBlobsId[i].lid)); - } - return inputLayers; -} - -std::vector Net::getLayerNames() const -{ - CV_TRACE_FUNCTION(); - - std::vector res; - res.reserve(impl->layers.size()); - - Impl::MapIdToLayerData::const_iterator it; - for (it = impl->layers.begin(); it != impl->layers.end(); it++) - { - if (it->second.id) //skip Data layer - res.push_back(it->second.name); - } - - return res; -} - -bool Net::empty() const -{ - return impl->layers.size() <= 1; //first layer is default Data layer -} - -std::vector Net::getUnconnectedOutLayers() const -{ - CV_TRACE_FUNCTION(); - CV_Assert(impl); - - std::vector layersIds; - - // registerOutput() flow - const std::map& outputNameToId = impl->outputNameToId; - if (!outputNameToId.empty()) - { - for (std::map::const_iterator it = outputNameToId.begin(); it != outputNameToId.end(); ++it) - { - layersIds.push_back(it->second); - } - return layersIds; - } - - Impl::MapIdToLayerData::const_iterator it; - for (it = impl->layers.begin(); it != impl->layers.end(); it++) - { - int lid = it->first; - const LayerData &ld = it->second; - - if (ld.requiredOutputs.size() == 0) - layersIds.push_back(lid); - } - - return layersIds; -} - -std::vector Net::getUnconnectedOutLayersNames() const -{ - std::vector ids = getUnconnectedOutLayers(); - const size_t n = ids.size(); - std::vector names(n); - for (size_t i = 0; i < n; ++i) - { - names[i] = impl->layers[ids[i]].name; - } - return names; -} - -void Net::getLayersShapes(const ShapesVec& netInputShapes, - std::vector& layersIds, - std::vector& inLayersShapes, - std::vector& outLayersShapes) const -{ - layersIds.clear(); - inLayersShapes.clear(); - outLayersShapes.clear(); - - Impl::LayersShapesMap inOutShapes; - impl->getLayersShapes(netInputShapes, inOutShapes); - - for(Impl::LayersShapesMap::const_iterator it = inOutShapes.begin(); - it != inOutShapes.end(); it++) - { - layersIds.push_back(it->first); - inLayersShapes.push_back(it->second.in); - outLayersShapes.push_back(it->second.out); - } -} - -void Net::getLayersShapes(const MatShape& netInputShape, - std::vector& layerIds, - std::vector& inLayersShapes, - std::vector& outLayersShapes) const -{ - getLayersShapes(ShapesVec(1, netInputShape), - layerIds, inLayersShapes, outLayersShapes); -} - -void Net::getLayerShapes(const MatShape& netInputShape, - const int layerId, - ShapesVec& inLayerShapes, - ShapesVec& outLayerShapes) const -{ - getLayerShapes(ShapesVec(1, netInputShape), - layerId, inLayerShapes, outLayerShapes); - -} - -void Net::getLayerShapes(const ShapesVec& netInputShapes, - const int layerId, - ShapesVec& inLayerShapes, - ShapesVec& outLayerShapes) const -{ - LayerShapes shapes; - impl->getLayerShapes(netInputShapes, layerId, shapes); - inLayerShapes = shapes.in; - outLayerShapes = shapes.out; -} - -int64 Net::getFLOPS(const std::vector& netInputShapes) const -{ - CV_TRACE_FUNCTION(); - - int64 flops = 0; - std::vector ids; - std::vector > inShapes, outShapes; - getLayersShapes(netInputShapes, ids, inShapes, outShapes); - CV_Assert(inShapes.size() == outShapes.size()); - CV_Assert(inShapes.size() == ids.size()); - - for(int i = 0; i < ids.size(); i++) - { - flops += impl->layers[ids[i]].getLayerInstance()->getFLOPS(inShapes[i], - outShapes[i]); - } - - return flops; -} - -int64 Net::getFLOPS(const MatShape& netInputShape) const -{ - return getFLOPS(std::vector(1, netInputShape)); -} - -int64 Net::getFLOPS(const int layerId, - const std::vector& netInputShapes) const -{ - Impl::MapIdToLayerData::const_iterator layer = impl->layers.find(layerId); - CV_Assert(layer != impl->layers.end()); - - LayerShapes shapes; - impl->getLayerShapes(netInputShapes, layerId, shapes); - - return const_cast(layer->second).getLayerInstance()->getFLOPS(shapes.in, shapes.out); -} - -int64 Net::getFLOPS(const int layerId, - const MatShape& netInputShape) const -{ - return getFLOPS(layerId, std::vector(1, netInputShape)); -} - -void Net::getLayerTypes(std::vector& layersTypes) const -{ - layersTypes.clear(); - - std::map layers; - for (Impl::MapIdToLayerData::const_iterator it = impl->layers.begin(); - it != impl->layers.end(); it++) - { - if (layers.find(it->second.type) == layers.end()) - layers[it->second.type] = 0; - layers[it->second.type]++; - } - - for (std::map::const_iterator it = layers.begin(); - it != layers.end(); it++) - { - layersTypes.push_back(it->first); - } -} - -int Net::getLayersCount(const String& layerType) const -{ - int count = 0; - for (Impl::MapIdToLayerData::const_iterator it = impl->layers.begin(); - it != impl->layers.end(); it++) - { - if (it->second.type == layerType) - count++; - } - return count; -} - -void Net::getMemoryConsumption(const int layerId, - const std::vector& netInputShapes, - size_t& weights, size_t& blobs) const -{ - CV_TRACE_FUNCTION(); - - Impl::MapIdToLayerData::const_iterator layer = impl->layers.find(layerId); - CV_Assert(layer != impl->layers.end()); - - weights = blobs = 0; - - for(int i = 0; i < layer->second.params.blobs.size(); i++) - { - const Mat& weightsBlob = layer->second.params.blobs[i]; - weights += weightsBlob.total()*weightsBlob.elemSize(); - } - - ShapesVec inLayerShapes, outLayerShapes; - getLayerShapes(netInputShapes, layerId, inLayerShapes, outLayerShapes); - size_t elemSize = (impl->netWasQuantized) ? sizeof(char) : sizeof(float); - for(int i = 0; i < outLayerShapes.size(); i++) - { - blobs += total(outLayerShapes[i]) * elemSize; - } -} - -void Net::getMemoryConsumption(const std::vector& netInputShapes, - size_t& weights, size_t& blobs) const -{ - CV_TRACE_FUNCTION(); - - std::vector layerIds; - std::vector w, b; - getMemoryConsumption(netInputShapes, layerIds, w, b); - - weights = blobs = 0; - for(int i = 0; i < layerIds.size(); i++) - { - weights += w[i]; - blobs += b[i]; - } -} - -void Net::getMemoryConsumption(const int layerId, - const MatShape& netInputShape, - size_t& weights, size_t& blobs) const -{ - getMemoryConsumption(layerId, std::vector(1, netInputShape), - weights, blobs); -} - -void Net::getMemoryConsumption(const MatShape& netInputShape, - size_t& weights, size_t& blobs) const -{ - getMemoryConsumption(std::vector(1, netInputShape), - weights, blobs); -} - -void Net::getMemoryConsumption(const std::vector& netInputShapes, - std::vector& layerIds, std::vector& weights, - std::vector& blobs) const -{ - CV_TRACE_FUNCTION(); - - layerIds.clear(); - weights.clear(); - blobs.clear(); - - std::vector > inLayerShapes, outLayerShapes; - - getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes); - size_t elemSize = (impl->netWasQuantized) ? sizeof(char) : sizeof(float); - for(int i = 0; i < layerIds.size(); i++) - { - int w = 0, b = 0; - Impl::MapIdToLayerData::const_iterator layer = impl->layers.find(layerIds[i]); - CV_Assert(layer != impl->layers.end()); - - for(int j = 0; j < layer->second.params.blobs.size(); j++) - { - const Mat& weightsBlob = layer->second.params.blobs[j]; - w += weightsBlob.total()*weightsBlob.elemSize(); - } - - for(int j = 0; j < outLayerShapes[i].size(); j++) - { - b += total(outLayerShapes[i][j]) * elemSize; - } - - weights.push_back(w); - blobs.push_back(b); - } -} - -void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector& layerIds, - std::vector& weights, std::vector& blobs) const -{ - getMemoryConsumption(std::vector(1, netInputShape), layerIds, - weights, blobs); -} - -void Net::enableFusion(bool fusion) -{ - if( impl->fusion != fusion ) - { - impl->fusion = fusion; - impl->clear(); - } -} - -void Net::setHalideScheduler(const String& scheduler) -{ - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(scheduler, "scheduler", scheduler.c_str()); - - impl->halideConfigFile = scheduler; -} - -int64 Net::getPerfProfile(std::vector& timings) -{ - timings = std::vector(impl->layersTimings.begin() + 1, impl->layersTimings.end()); - int64 total = (int64)std::accumulate(timings.begin(), timings.end(), 0.0); - return total; -} - -////////////////////////////////////////////////////////////////////////// - -Layer::Layer() { preferableTarget = DNN_TARGET_CPU; } - -Layer::Layer(const LayerParams ¶ms) - : blobs(params.blobs), name(params.name), type(params.type) -{ - preferableTarget = DNN_TARGET_CPU; -} - -void Layer::setParamsFrom(const LayerParams ¶ms) -{ - blobs = params.blobs; - name = params.name; - type = params.type; -} - -int Layer::inputNameToIndex(String) -{ - return -1; -} - -int Layer::outputNameToIndex(const String&) -{ - return 0; -} - -bool Layer::supportBackend(int backendId) -{ - return backendId == DNN_BACKEND_OPENCV; -} - -Ptr Layer::initCUDA( - void*, - const std::vector>&, - const std::vector>&) -{ - CV_Error(Error::StsNotImplemented, "CUDA pipeline of " + type + - " layers is not defined."); - return Ptr(); -} - -Ptr Layer::initVkCom(const std::vector > &) -{ - CV_Error(Error::StsNotImplemented, "VkCom pipeline of " + type + - " layers is not defined."); - return Ptr(); -} - -Ptr Layer::initHalide(const std::vector > &) -{ - CV_Error(Error::StsNotImplemented, "Halide pipeline of " + type + - " layers is not defined."); - return Ptr(); -} - -Ptr Layer::initNgraph(const std::vector > & inputs, const std::vector >& nodes) -{ - CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type + - " layers is not defined."); - return Ptr(); -} - -Ptr Layer::initWebnn(const std::vector > & inputs, const std::vector >& nodes) -{ - CV_Error(Error::StsNotImplemented, "WebNN pipeline of " + type + - " layers is not defined."); - return Ptr(); -} - -void Layer::applyHalideScheduler(Ptr& node, const std::vector &inputs, - const std::vector &outputs, int targetId) const -{ -#ifdef HAVE_HALIDE - CV_TRACE_FUNCTION(); - - Halide::Var x("x"), y("y"), c("c"), n("n"), co("co"), ci("ci"), - xo("xo"), xi("xi"), yo("yo"), yi("yi"), tile("tile"); - Halide::Func& top = node.dynamicCast()->funcs.back(); - - int outW, outH, outC, outN; - getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN); - - if (targetId == DNN_TARGET_CPU) - { - if (outW == 1 && outH == 1) - { - if (outC + outN == 1) - return; - - if (outC > 8) - top.split(c, co, ci, 8) - .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile) - .parallel(tile) - .vectorize(ci, 8); - else - top.fuse(x, y, tile).fuse(c, tile, tile).fuse(n, tile, tile) - .parallel(tile); - } - else - { - if (outH > 2) - { - top.reorder(x, c, y) - .split(y, yo, yi, 2) - .fuse(yo, n, tile) - .parallel(tile) - .unroll(yi) - .vectorize(x, outW >= 16 ? 16 : outW); - } - } - } - else if (targetId == DNN_TARGET_OPENCL) - { - if (outW == 1 && outH == 1) - { - int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC; - top.split(c, co, ci, c_split) - .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile) - .gpu_blocks(tile) - .gpu_threads(ci); - } - else - { - int x_split = outW > 8 ? (outW >= 32 ? 16 : 8) : outW; - int y_split = outH > 8 ? (outH >= 32 ? 16 : 8) : outH; - // Supported vectorization widths: 2, 3, 4, 8, 16 - int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : std::min(4, outC); - top.split(x, xo, xi, x_split).split(y, yo, yi, y_split) - .split(c, co, ci, c_split) - .gpu_blocks(xo, yo, co) - .gpu_threads(xi, yi) - .reorder(xi, yi, ci, xo, yo, co) - .vectorize(ci); - } - } - else - CV_Error(Error::StsNotImplemented, "Unknown target identifier"); -#endif // HAVE_HALIDE -} - -Ptr Layer::tryAttach(const Ptr& node) -{ - return Ptr(); -} - -bool Layer::setActivation(const Ptr&) { return false; } -bool Layer::tryFuse(Ptr&) { return false; } -void Layer::getScaleShift(Mat& scale, Mat& shift) const -{ - scale = Mat(); - shift = Mat(); -} - -void Layer::getScaleZeropoint(float& scale, int& zeropoint) const -{ - scale = 1.f; - zeropoint = 0; -} - -void Layer::unsetAttached() -{ - setActivation(Ptr()); -} - -template -static void vecToPVec(const std::vector &v, std::vector &pv) -{ - pv.resize(v.size()); - for (size_t i = 0; i < v.size(); i++) - pv[i] = const_cast(&v[i]); -} - -void Layer::finalize(const std::vector &inputs, std::vector &outputs) -{ - CV_TRACE_FUNCTION(); - this->finalize((InputArrayOfArrays)inputs, (OutputArrayOfArrays)outputs); -} - -void Layer::finalize(const std::vector &input, std::vector &output) -{ - CV_UNUSED(input);CV_UNUSED(output); -} - -void Layer::finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) -{ - CV_TRACE_FUNCTION(); - std::vector inputs, outputs; - inputs_arr.getMatVector(inputs); - outputs_arr.getMatVector(outputs); - - std::vector inputsp; - vecToPVec(inputs, inputsp); - this->finalize(inputsp, outputs); -} - -std::vector Layer::finalize(const std::vector &inputs) -{ - CV_TRACE_FUNCTION(); - - std::vector outputs; - this->finalize(inputs, outputs); - return outputs; -} - -void Layer::forward(std::vector &input, std::vector &output, std::vector &internals) -{ - // We kept this method for compatibility. DNN calls it now only to support users' implementations. -} - -void Layer::forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) -{ - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - - Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); -} - -void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) -{ - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - - if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S) - { - std::vector inputs; - std::vector outputs; - std::vector internals; - - std::vector orig_inputs; - std::vector orig_outputs; - std::vector orig_internals; - - inputs_arr.getUMatVector(orig_inputs); - outputs_arr.getUMatVector(orig_outputs); - internals_arr.getUMatVector(orig_internals); - - inputs.resize(orig_inputs.size()); - for (size_t i = 0; i < orig_inputs.size(); i++) - convertFp16(orig_inputs[i], inputs[i]); - - outputs.resize(orig_outputs.size()); - for (size_t i = 0; i < orig_outputs.size(); i++) - outputs[i].create(shape(orig_outputs[i]), CV_32F); - - internals.resize(orig_internals.size()); - for (size_t i = 0; i < orig_internals.size(); i++) - internals[i].create(shape(orig_internals[i]), CV_32F); - - forward(inputs, outputs, internals); - - for (size_t i = 0; i < outputs.size(); i++) - convertFp16(outputs[i], orig_outputs[i]); - - // sync results back - outputs_arr.assign(orig_outputs); - internals_arr.assign(orig_internals); - return; - } - std::vector inpvec; - std::vector outputs; - std::vector internals; - - inputs_arr.getMatVector(inpvec); - outputs_arr.getMatVector(outputs); - internals_arr.getMatVector(internals); - - std::vector inputs(inpvec.size()); - for (int i = 0; i < inpvec.size(); i++) - inputs[i] = &inpvec[i]; - - this->forward(inputs, outputs, internals); - - // sync results back - outputs_arr.assign(outputs); - internals_arr.assign(internals); -} - -void Layer::run(const std::vector &inputs, std::vector &outputs, std::vector &internals) -{ - CV_TRACE_FUNCTION(); - - this->finalize(inputs, outputs); - this->forward(inputs, outputs, internals); -} - -bool Layer::tryQuantize(const std::vector > &scales, - const std::vector > &zeropoints, LayerParams& params) -{ - return false; -} - -Layer::~Layer() {} - -bool Layer::getMemoryShapes(const std::vector &inputs, - const int requiredOutputs, - std::vector &outputs, - std::vector &internals) const -{ - CV_Assert(inputs.size()); - outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]); - return false; -} - -bool Layer::updateMemoryShapes(const std::vector &inputs) -{ - return true; -} -////////////////////////////////////////////////////////////////////////// - -Mutex& getLayerFactoryMutex() -{ - static Mutex* volatile instance = NULL; - if (instance == NULL) - { - cv::AutoLock lock(getInitializationMutex()); - if (instance == NULL) - instance = new Mutex(); - } - return *instance; -} - -static LayerFactory_Impl& getLayerFactoryImpl_() -{ - static LayerFactory_Impl impl; - return impl; -} - -LayerFactory_Impl& getLayerFactoryImpl() -{ - static LayerFactory_Impl* volatile instance = NULL; - if (instance == NULL) - { - cv::AutoLock lock(getLayerFactoryMutex()); - if (instance == NULL) - { - instance = &getLayerFactoryImpl_(); - initializeLayerFactory(); - } - } - return *instance; -} - -void LayerFactory::registerLayer(const String &type, Constructor constructor) -{ - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(type, "type", type.c_str()); - - cv::AutoLock lock(getLayerFactoryMutex()); - LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type); - - if (it != getLayerFactoryImpl().end()) - { - if (it->second.back() == constructor) - CV_Error(cv::Error::StsBadArg, "Layer \"" + type + "\" already was registered"); - it->second.push_back(constructor); - } - getLayerFactoryImpl().insert(std::make_pair(type, std::vector(1, constructor))); -} - -void LayerFactory::unregisterLayer(const String &type) -{ - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(type, "type", type.c_str()); - - cv::AutoLock lock(getLayerFactoryMutex()); - - LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type); - if (it != getLayerFactoryImpl().end()) - { - if (it->second.size() > 1) - it->second.pop_back(); - else - getLayerFactoryImpl().erase(it); - } -} - -bool LayerFactory::isLayerRegistered(const std::string& type) -{ - cv::AutoLock lock(getLayerFactoryMutex()); - auto& registeredLayers = getLayerFactoryImpl(); - return registeredLayers.find(type) != registeredLayers.end(); -} - -Ptr LayerFactory::createLayerInstance(const String &type, LayerParams& params) -{ - CV_TRACE_FUNCTION(); - CV_TRACE_ARG_VALUE(type, "type", type.c_str()); - - cv::AutoLock lock(getLayerFactoryMutex()); - LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type); - - if (it != getLayerFactoryImpl().end()) - { - CV_Assert(!it->second.empty()); - return it->second.back()(params); - } - else - { - return Ptr(); //NULL - } -} - -BackendNode::BackendNode(int backendId) : backendId(backendId) {} - -BackendNode::~BackendNode() {}; - -BackendWrapper::BackendWrapper(int backendId, int targetId) - : backendId(backendId), targetId(targetId) {} - -BackendWrapper::BackendWrapper(int targetId, const cv::Mat& m) -{ - CV_Error(Error::StsNotImplemented, - "Constructor of backend wrapper must be implemented"); -} - -BackendWrapper::BackendWrapper(const Ptr& base, const MatShape& shape) -{ - CV_Error(Error::StsNotImplemented, - "Constructor of backend wrapper must be implemented"); -} - -BackendWrapper::~BackendWrapper() {} - -Net readNet(const String& _model, const String& _config, const String& _framework) -{ - String framework = toLowerCase(_framework); - String model = _model; - String config = _config; - const std::string modelExt = model.substr(model.rfind('.') + 1); - const std::string configExt = config.substr(config.rfind('.') + 1); - if (framework == "caffe" || modelExt == "caffemodel" || configExt == "caffemodel" || - modelExt == "prototxt" || configExt == "prototxt") - { - if (modelExt == "prototxt" || configExt == "caffemodel") - std::swap(model, config); - return readNetFromCaffe(config, model); - } - if (framework == "tensorflow" || modelExt == "pb" || configExt == "pb" || - modelExt == "pbtxt" || configExt == "pbtxt") - { - if (modelExt == "pbtxt" || configExt == "pb") - std::swap(model, config); - return readNetFromTensorflow(model, config); - } - if (framework == "torch" || modelExt == "t7" || modelExt == "net" || - configExt == "t7" || configExt == "net") - { - return readNetFromTorch(model.empty() ? config : model); - } - if (framework == "darknet" || modelExt == "weights" || configExt == "weights" || - modelExt == "cfg" || configExt == "cfg") - { - if (modelExt == "cfg" || configExt == "weights") - std::swap(model, config); - return readNetFromDarknet(config, model); - } - if (framework == "dldt" || modelExt == "bin" || configExt == "bin" || - modelExt == "xml" || configExt == "xml") - { - if (modelExt == "xml" || configExt == "bin") - std::swap(model, config); - return readNetFromModelOptimizer(config, model); - } - if (framework == "onnx" || modelExt == "onnx") - { - return readNetFromONNX(model); - } - CV_Error(Error::StsError, "Cannot determine an origin framework of files: " + - model + (config.empty() ? "" : ", " + config)); -} - -Net readNet(const String& _framework, const std::vector& bufferModel, - const std::vector& bufferConfig) -{ - String framework = toLowerCase(_framework); - if (framework == "caffe") - return readNetFromCaffe(bufferConfig, bufferModel); - else if (framework == "tensorflow") - return readNetFromTensorflow(bufferModel, bufferConfig); - else if (framework == "darknet") - return readNetFromDarknet(bufferConfig, bufferModel); - else if (framework == "torch") - CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers"); - else if (framework == "dldt") - return readNetFromModelOptimizer(bufferConfig, bufferModel); - CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework); -} - -Net readNetFromModelOptimizer(const String &xml, const String &bin) -{ - return Net::readFromModelOptimizer(xml, bin); -} - -Net readNetFromModelOptimizer(const std::vector& bufferCfg, const std::vector& bufferModel) -{ - return Net::readFromModelOptimizer(bufferCfg, bufferModel); -} - -Net readNetFromModelOptimizer( - const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, - const uchar* bufferWeightsPtr, size_t bufferWeightsSize -) -{ - return Net::readFromModelOptimizer( - bufferModelConfigPtr, bufferModelConfigSize, - bufferWeightsPtr, bufferWeightsSize - ); -} - -CV__DNN_INLINE_NS_END -}} // namespace +// This is a stub file to provide git history information. +// Content has been moved, see PR: https://github.com/opencv/opencv/pull/21662 +// Base commit: 19926e2979ef049a89dd029e2231555db40c2776 +// Original dnn.cpp content: https://github.com/opencv/opencv/blame/19926e2979ef049a89dd029e2231555db40c2776/modules/dnn/src/dnn.cpp diff --git a/modules/dnn/src/dnn_common.hpp b/modules/dnn/src/dnn_common.hpp index 6350a4e168..ae4d9c295e 100644 --- a/modules/dnn/src/dnn_common.hpp +++ b/modules/dnn/src/dnn_common.hpp @@ -19,7 +19,44 @@ void initializeLayerFactory(); extern bool DNN_DIAGNOSTICS_RUN; extern bool DNN_SKIP_REAL_IMPORT; -namespace detail { +// +// dnn_params.cpp +// + +/// Network dump level +size_t getParam_DNN_NETWORK_DUMP(); + +/// This parameter is useful to run with valgrind memory errors detection +bool getParam_DNN_DISABLE_MEMORY_OPTIMIZATIONS(); + +#ifdef HAVE_OPENCL +bool getParam_DNN_OPENCL_ALLOW_ALL_DEVICES(); +#endif + +int getParam_DNN_BACKEND_DEFAULT(); + +// Additional checks (slowdowns execution!) +bool getParam_DNN_CHECK_NAN_INF(); +bool getParam_DNN_CHECK_NAN_INF_DUMP(); +bool getParam_DNN_CHECK_NAN_INF_RAISE_ERROR(); + + +inline namespace detail { + +typedef std::vector ShapesVec; + +struct LayerShapes +{ + ShapesVec in, out, internal; + // No guarantees that layer which support in-place computations + // will be computed in-place (input.data_ptr == output.data_ptr). + // If layer said that it could work in-place and layers after it + // no longer use input blob, we'll set output = input. + bool supportInPlace; + LayerShapes() {supportInPlace = false;} +}; + + #define CALL_MEMBER_FN(object, ptrToMemFn) ((object).*(ptrToMemFn)) class NotImplemented : public Layer @@ -82,8 +119,6 @@ struct NetImplBase } // namespace detail -typedef std::vector ShapesVec; - static inline std::string toString(const ShapesVec& shapes, const std::string& name = std::string()) { std::ostringstream ss; diff --git a/modules/dnn/src/dnn_params.cpp b/modules/dnn/src/dnn_params.cpp new file mode 100644 index 0000000000..48e89c6fac --- /dev/null +++ b/modules/dnn/src/dnn_params.cpp @@ -0,0 +1,67 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "dnn_common.hpp" +#include + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +size_t getParam_DNN_NETWORK_DUMP() +{ + static size_t DNN_NETWORK_DUMP = utils::getConfigurationParameterSizeT("OPENCV_DNN_NETWORK_DUMP", 0); + return DNN_NETWORK_DUMP; +} + +// this option is useful to run with valgrind memory errors detection +bool getParam_DNN_DISABLE_MEMORY_OPTIMIZATIONS() +{ + static bool DNN_DISABLE_MEMORY_OPTIMIZATIONS = utils::getConfigurationParameterBool("OPENCV_DNN_DISABLE_MEMORY_OPTIMIZATIONS", false); + return DNN_DISABLE_MEMORY_OPTIMIZATIONS; +} + +#ifdef HAVE_OPENCL +bool getParam_DNN_OPENCL_ALLOW_ALL_DEVICES() +{ + static bool DNN_OPENCL_ALLOW_ALL_DEVICES = utils::getConfigurationParameterBool("OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES", false); + return DNN_OPENCL_ALLOW_ALL_DEVICES; +} +#endif + +int getParam_DNN_BACKEND_DEFAULT() +{ + static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT", +#ifdef HAVE_INF_ENGINE + (size_t)DNN_BACKEND_INFERENCE_ENGINE +#else + (size_t)DNN_BACKEND_OPENCV +#endif + ); + return PARAM_DNN_BACKEND_DEFAULT; +} + +// Additional checks (slowdowns execution!) +bool getParam_DNN_CHECK_NAN_INF() +{ + static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false); + return DNN_CHECK_NAN_INF; +} +bool getParam_DNN_CHECK_NAN_INF_DUMP() +{ + static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false); + return DNN_CHECK_NAN_INF_DUMP; +} +bool getParam_DNN_CHECK_NAN_INF_RAISE_ERROR() +{ + static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false); + return DNN_CHECK_NAN_INF_RAISE_ERROR; +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/dnn_read.cpp b/modules/dnn/src/dnn_read.cpp new file mode 100644 index 0000000000..931170722b --- /dev/null +++ b/modules/dnn/src/dnn_read.cpp @@ -0,0 +1,93 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +Net readNet(const String& _model, const String& _config, const String& _framework) +{ + String framework = toLowerCase(_framework); + String model = _model; + String config = _config; + const std::string modelExt = model.substr(model.rfind('.') + 1); + const std::string configExt = config.substr(config.rfind('.') + 1); + if (framework == "caffe" || modelExt == "caffemodel" || configExt == "caffemodel" || modelExt == "prototxt" || configExt == "prototxt") + { + if (modelExt == "prototxt" || configExt == "caffemodel") + std::swap(model, config); + return readNetFromCaffe(config, model); + } + if (framework == "tensorflow" || modelExt == "pb" || configExt == "pb" || modelExt == "pbtxt" || configExt == "pbtxt") + { + if (modelExt == "pbtxt" || configExt == "pb") + std::swap(model, config); + return readNetFromTensorflow(model, config); + } + if (framework == "torch" || modelExt == "t7" || modelExt == "net" || configExt == "t7" || configExt == "net") + { + return readNetFromTorch(model.empty() ? config : model); + } + if (framework == "darknet" || modelExt == "weights" || configExt == "weights" || modelExt == "cfg" || configExt == "cfg") + { + if (modelExt == "cfg" || configExt == "weights") + std::swap(model, config); + return readNetFromDarknet(config, model); + } + if (framework == "dldt" || modelExt == "bin" || configExt == "bin" || modelExt == "xml" || configExt == "xml") + { + if (modelExt == "xml" || configExt == "bin") + std::swap(model, config); + return readNetFromModelOptimizer(config, model); + } + if (framework == "onnx" || modelExt == "onnx") + { + return readNetFromONNX(model); + } + CV_Error(Error::StsError, "Cannot determine an origin framework of files: " + model + (config.empty() ? "" : ", " + config)); +} + +Net readNet(const String& _framework, const std::vector& bufferModel, + const std::vector& bufferConfig) +{ + String framework = toLowerCase(_framework); + if (framework == "caffe") + return readNetFromCaffe(bufferConfig, bufferModel); + else if (framework == "tensorflow") + return readNetFromTensorflow(bufferModel, bufferConfig); + else if (framework == "darknet") + return readNetFromDarknet(bufferConfig, bufferModel); + else if (framework == "torch") + CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers"); + else if (framework == "dldt") + return readNetFromModelOptimizer(bufferConfig, bufferModel); + CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework); +} + +Net readNetFromModelOptimizer(const String& xml, const String& bin) +{ + return Net::readFromModelOptimizer(xml, bin); +} + +Net readNetFromModelOptimizer(const std::vector& bufferCfg, const std::vector& bufferModel) +{ + return Net::readFromModelOptimizer(bufferCfg, bufferModel); +} + +Net readNetFromModelOptimizer( + const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, + const uchar* bufferWeightsPtr, size_t bufferWeightsSize) +{ + return Net::readFromModelOptimizer( + bufferModelConfigPtr, bufferModelConfigSize, + bufferWeightsPtr, bufferWeightsSize); +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/dnn_utils.cpp b/modules/dnn/src/dnn_utils.cpp new file mode 100644 index 0000000000..aa4a6eadf1 --- /dev/null +++ b/modules/dnn/src/dnn_utils.cpp @@ -0,0 +1,158 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include + + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +Mat blobFromImage(InputArray image, double scalefactor, const Size& size, + const Scalar& mean, bool swapRB, bool crop, int ddepth) +{ + CV_TRACE_FUNCTION(); + Mat blob; + blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop, ddepth); + return blob; +} + +void blobFromImage(InputArray image, OutputArray blob, double scalefactor, + const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth) +{ + CV_TRACE_FUNCTION(); + std::vector images(1, image.getMat()); + blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth); +} + +Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size, + const Scalar& mean, bool swapRB, bool crop, int ddepth) +{ + CV_TRACE_FUNCTION(); + Mat blob; + blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth); + return blob; +} + +void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor, + Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth) +{ + CV_TRACE_FUNCTION(); + CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U"); + if (ddepth == CV_8U) + { + CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth"); + CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth"); + } + + std::vector images; + images_.getMatVector(images); + CV_Assert(!images.empty()); + for (size_t i = 0; i < images.size(); i++) + { + Size imgSize = images[i].size(); + if (size == Size()) + size = imgSize; + if (size != imgSize) + { + if (crop) + { + float resizeFactor = std::max(size.width / (float)imgSize.width, + size.height / (float)imgSize.height); + resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR); + Rect crop(Point(0.5 * (images[i].cols - size.width), + 0.5 * (images[i].rows - size.height)), + size); + images[i] = images[i](crop); + } + else + resize(images[i], images[i], size, 0, 0, INTER_LINEAR); + } + if (images[i].depth() == CV_8U && ddepth == CV_32F) + images[i].convertTo(images[i], CV_32F); + Scalar mean = mean_; + if (swapRB) + std::swap(mean[0], mean[2]); + + images[i] -= mean; + images[i] *= scalefactor; + } + + size_t nimages = images.size(); + Mat image0 = images[0]; + int nch = image0.channels(); + CV_Assert(image0.dims == 2); + if (nch == 3 || nch == 4) + { + int sz[] = { (int)nimages, nch, image0.rows, image0.cols }; + blob_.create(4, sz, ddepth); + Mat blob = blob_.getMat(); + Mat ch[4]; + + for (size_t i = 0; i < nimages; i++) + { + const Mat& image = images[i]; + CV_Assert(image.depth() == blob_.depth()); + nch = image.channels(); + CV_Assert(image.dims == 2 && (nch == 3 || nch == 4)); + CV_Assert(image.size() == image0.size()); + + for (int j = 0; j < nch; j++) + ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j)); + if (swapRB) + std::swap(ch[0], ch[2]); + split(image, ch); + } + } + else + { + CV_Assert(nch == 1); + int sz[] = { (int)nimages, 1, image0.rows, image0.cols }; + blob_.create(4, sz, ddepth); + Mat blob = blob_.getMat(); + + for (size_t i = 0; i < nimages; i++) + { + const Mat& image = images[i]; + CV_Assert(image.depth() == blob_.depth()); + nch = image.channels(); + CV_Assert(image.dims == 2 && (nch == 1)); + CV_Assert(image.size() == image0.size()); + + image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0))); + } + } +} + +void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_) +{ + CV_TRACE_FUNCTION(); + + // A blob is a 4 dimensional matrix in floating point precision + // blob_[0] = batchSize = nbOfImages + // blob_[1] = nbOfChannels + // blob_[2] = height + // blob_[3] = width + CV_Assert(blob_.depth() == CV_32F); + CV_Assert(blob_.dims == 4); + + images_.create(cv::Size(1, blob_.size[0]), blob_.depth()); + + std::vector vectorOfChannels(blob_.size[1]); + for (int n = 0; n < blob_.size[0]; ++n) + { + for (int c = 0; c < blob_.size[1]; ++c) + { + vectorOfChannels[c] = getPlane(blob_, n, c); + } + cv::merge(vectorOfChannels, images_.getMatRef(n)); + } +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp index 44cd074de4..d2bb2f189c 100644 --- a/modules/dnn/src/ie_ngraph.cpp +++ b/modules/dnn/src/ie_ngraph.cpp @@ -80,7 +80,7 @@ class NgraphCustomOp: public ngraph::op::Op { public: const ngraph::NodeTypeInfo& get_type_info() const override { - static constexpr ngraph::NodeTypeInfo type_info{kOpenCVLayersType, 0}; + static constexpr ngraph::NodeTypeInfo type_info{kOpenCVLayersType, static_cast(0)}; return type_info; } diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index 55ed1e5d17..6979d1864d 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -92,6 +92,7 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(Pooling, PoolingLayer); CV_DNN_REGISTER_LAYER_CLASS(ROIPooling, PoolingLayer); CV_DNN_REGISTER_LAYER_CLASS(PSROIPooling, PoolingLayer); + CV_DNN_REGISTER_LAYER_CLASS(Reduce, ReduceLayer); CV_DNN_REGISTER_LAYER_CLASS(LRN, LRNLayer); CV_DNN_REGISTER_LAYER_CLASS(InnerProduct, InnerProductLayer); CV_DNN_REGISTER_LAYER_CLASS(Softmax, SoftmaxLayer); @@ -129,6 +130,8 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(HardSwish, HardSwishLayer); CV_DNN_REGISTER_LAYER_CLASS(Sin, SinLayer); CV_DNN_REGISTER_LAYER_CLASS(Sinh, SinhLayer); + CV_DNN_REGISTER_LAYER_CLASS(Sign, SignLayer); + CV_DNN_REGISTER_LAYER_CLASS(Shrink, ShrinkLayer); CV_DNN_REGISTER_LAYER_CLASS(Softplus, SoftplusLayer); CV_DNN_REGISTER_LAYER_CLASS(Softsign, SoftsignLayer); CV_DNN_REGISTER_LAYER_CLASS(Tan, TanLayer); @@ -143,6 +146,7 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(Silence, BlankLayer); CV_DNN_REGISTER_LAYER_CLASS(Const, ConstLayer); CV_DNN_REGISTER_LAYER_CLASS(Arg, ArgLayer); + CV_DNN_REGISTER_LAYER_CLASS(Reciprocal, ReciprocalLayer); CV_DNN_REGISTER_LAYER_CLASS(Crop, CropLayer); CV_DNN_REGISTER_LAYER_CLASS(Eltwise, EltwiseLayer); @@ -175,6 +179,7 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(ConvolutionInt8, ConvolutionLayerInt8); CV_DNN_REGISTER_LAYER_CLASS(InnerProductInt8, InnerProductLayerInt8); CV_DNN_REGISTER_LAYER_CLASS(PoolingInt8, PoolingLayerInt8); + CV_DNN_REGISTER_LAYER_CLASS(ReduceInt8, ReduceLayerInt8); CV_DNN_REGISTER_LAYER_CLASS(EltwiseInt8, EltwiseLayerInt8); CV_DNN_REGISTER_LAYER_CLASS(BatchNormInt8, BatchNormLayerInt8); CV_DNN_REGISTER_LAYER_CLASS(ScaleInt8, ScaleLayerInt8); diff --git a/modules/dnn/src/int8layers/batch_norm_layer.cpp b/modules/dnn/src/int8layers/batch_norm_layer.cpp index c5b8c3d9e9..a3a9ebb261 100644 --- a/modules/dnn/src/int8layers/batch_norm_layer.cpp +++ b/modules/dnn/src/int8layers/batch_norm_layer.cpp @@ -4,6 +4,8 @@ #include "../precomp.hpp" #include "layers_common.hpp" +#include "../op_timvx.hpp" + #include namespace cv @@ -103,6 +105,11 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { + if (backendId == DNN_BACKEND_TIMVX && haveTimVX()) + { + return true; + } + return backendId == DNN_BACKEND_OPENCV; } @@ -116,6 +123,121 @@ public: return false; } + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { +#ifdef HAVE_TIMVX + // tvGraph Initialization. + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + const int numChannels = (int)origin_bias.total(); + Mat tvGamma = origin_weights.reshape(1, numChannels); + Mat tvBeta = origin_bias.reshape(1, numChannels); + + std::vector inputsIndex; + std::vector outputsIndex; + + Mat tvMean = Mat::zeros(1, numChannels, CV_32F); + tvMean = tvMean.reshape(1, numChannels); + Mat tvVar = Mat::ones(1, numChannels, CV_32F); + tvVar = tvVar.reshape(1, numChannels); + + CV_Assert(inputsWrapper.size() == 1); + if (outputsWrapper.size() > 1) + return Ptr(); + + Ptr tvInputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, input_sc, input_zp)); + + // input Tensor + auto inputWrapper = inputsWrapper[0].dynamicCast(); + Mat tmpInput = inputWrapper->getMat(); + + if (tmpInput.dims != 4) // Only support 4 dim input. + return Ptr(); + + int input_index = -1, mean_index = -1, var_index = -1, gamma_index = -1, beta_index = -1, output_index = -1; + + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if (input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor()) + { + inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT, tvInputQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + inputsIndex.push_back(input_index); + + // Mean tensor + Ptr meanWrapper = Ptr(new TimVXBackendWrapper(tvMean)); + Ptr meanQuant; + meanWrapper->createTensor(graph, tim::vx::TensorAttribute::CONSTANT); + mean_index = tvGraph->addWrapper(meanWrapper); + inputsIndex.push_back(mean_index); + + // Var tensor + Ptr varWrapper = Ptr(new TimVXBackendWrapper(tvVar)); + varWrapper->createTensor(graph,tim::vx::TensorAttribute::CONSTANT); + var_index = tvGraph->addWrapper(varWrapper); + inputsIndex.push_back(var_index); + + // Gamma tensor + Ptr gammaWrapper = Ptr(new TimVXBackendWrapper(tvGamma)); + gammaWrapper->createTensor(graph,tim::vx::TensorAttribute::CONSTANT); + gamma_index = tvGraph->addWrapper(gammaWrapper); + inputsIndex.push_back(gamma_index); + + // Beta tensor + Ptr betaWrapper = Ptr(new TimVXBackendWrapper(tvBeta)); + betaWrapper->createTensor(graph,tim::vx::TensorAttribute::CONSTANT); + beta_index = tvGraph->addWrapper(betaWrapper); + inputsIndex.push_back(beta_index); + + // Output tensor + CV_Assert(outputsWrapper.size() == 1); + Ptr outputWrapper = outputsWrapper[0].dynamicCast(); + Ptr outputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp)); + + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + std::shared_ptr tvBatchNorm = graph->CreateOperation(0.f); + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvBatchNorm, inputsIndex, outputsIndex); + + return tvBackendNode; +#endif // HAVE_TIMVX + return Ptr(); + } + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE { CV_TRACE_FUNCTION(); diff --git a/modules/dnn/src/int8layers/convolution_layer.cpp b/modules/dnn/src/int8layers/convolution_layer.cpp index ea29610222..45aaa3bc19 100644 --- a/modules/dnn/src/int8layers/convolution_layer.cpp +++ b/modules/dnn/src/int8layers/convolution_layer.cpp @@ -9,6 +9,7 @@ #include "opencv2/core/hal/hal.hpp" #include "opencv2/core/hal/intrin.hpp" +#include "../op_timvx.hpp" #include #include @@ -46,6 +47,7 @@ public: int ngroups = params.get("group", 1); CV_Assert(numOutput % ngroups == 0); + input_sc = params.get("input_scale"); input_zp = params.get("input_zeropoint"); output_zp = params.get("zeropoints"); output_sc = params.get("scales"); @@ -181,6 +183,16 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { size_t ksize = kernel_size.size(); + +#ifdef HAVE_TIMVX + if (backendId == DNN_BACKEND_TIMVX) + { + /* only Conv1d and Conv2d supported. */ + if (ksize == 2 || ksize == 1) + return true; + return false; + } +#endif // Only default backend and Conv1D/Conv2D/Conv3D are supported return backendId == DNN_BACKEND_OPENCV && ksize >= 1 && ksize <= 3; } @@ -261,6 +273,11 @@ public: bool setActivation(const Ptr& layer) CV_OVERRIDE { + // TODO! add activation in convolution. +#ifdef HAVE_TIMVX + if (preferableTarget == DNN_TARGET_NPU) + return false; +#endif Ptr activ_int8 = layer.dynamicCast(); if (!activ_int8.empty()) { @@ -300,6 +317,249 @@ public: outputMultiplier[outCn] = outputMultiplier[outCn+1] = outputMultiplier[outCn-1]; } + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { +#ifdef HAVE_TIMVX + /* TODO :support GroupConv; + Ref: + https://github.com/VeriSilicon/TIM-VX/blob/main/docs/Operators.md#conv2d + Link Reference: https://github.com/VeriSilicon/TIM-VX/blob/main/src/tim/vx/ops/conv1d_test.cc + */ + + // tvGraph Initialization. + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + Mat tvWeightMat = blobs[0]; + + std::vector tvBiasVec; + tvBiasVec.assign(biasvec.begin(), biasvec.end() - 2); + Mat tvBiasMat(tvBiasVec); + + for (int i = 0; i < numOutput; i++) + { + tvBiasVec[i] += input_zp * (cv::sum(blobs[0].row(i))[0]); + } + + // Padding Type + tim::vx::PadType tvPadType; + + if (padMode.empty()) + { + tvPadType = tim::vx::PadType::AUTO; // TODO! check the padding type. + } + else if(padMode == "VALID") + { + tvPadType = tim::vx::PadType::VALID; + } + else if (padMode == "SAME") + { + tvPadType = tim::vx::PadType::SAME; + } + else + { + CV_Error(Error::StsError, "Unsupported padding mode in TimVXBackend!"); + } + + size_t ksize = kernel_size.size(); + + std::vector inputsIndex; + std::vector outputsIndex; + + CV_Assert(inputsWrapper.size() == 1); + CV_Assert(ksize == 2 || ksize == 1); + + std::vector weight_scs, bias_scs; + std::vector weight_zps, bias_zps; + + weight_scs.resize(numOutput); + bias_scs.resize(numOutput); + + for (int i = 0; i < numOutput; i++) + { + bias_scs[i] = outputMultiplier[i] * output_sc; + weight_scs[i] = bias_scs[i] / input_sc; + } + + weight_zps.assign(numOutput, 0); + bias_zps.assign(numOutput, 0); + + bool tvSymmetric; + tvSymmetric = getQuantType(weight_scs, numOutput); + + // input Tensor + auto inputWrapper = inputsWrapper[0].dynamicCast(); + int input_index = -1, weight_index = -1, bias_index = -1, output_index = -1; + + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if (input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor()) + { + Ptr tvInputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, input_sc, input_zp)); + inputWrapper->createTensor(graph, tim::vx::TensorAttribute::INPUT, tvInputQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + inputsIndex.push_back(input_index); + + // weight Tensor + auto tvConvWeightShape = shape(tvWeightMat); + Mat tvInputMat = inputWrapper->getMat(); + // calculate group value. + int group = tvInputMat.size[1] / tvWeightMat.size[1]; + + // TODO! It will be supported in future. + if (tvSymmetric && tvWeightMat.total() == tvConvWeightShape[0]) + return Ptr(); + // Reverse weight shape From OpenCV NCHW to TimVX WHCN. + std::reverse(tvConvWeightShape.begin(), tvConvWeightShape.end()); + + Ptr weightWrapper = Ptr(new TimVXBackendWrapper(tvWeightMat)); + Ptr weightQuant; + + if (tvSymmetric) + { + int wtChanneldim = tvWeightMat.dims - 1; + weightQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::SYMMETRIC_PER_CHANNEL, wtChanneldim, + weight_scs, weight_zps)); + } + else + { + weightQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, weight_scs[0], 0)); + } + weightWrapper->createTensor(graph,tim::vx::TensorAttribute::CONSTANT, weightQuant); + + weight_index = tvGraph->addWrapper(weightWrapper); + inputsIndex.push_back(weight_index); + + // Bias Tensor + Ptr biasWrapper = Ptr(new TimVXBackendWrapper(tvBiasMat)); + Ptr biasQuant; + + if (tvSymmetric) + { + biasQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::SYMMETRIC_PER_CHANNEL, 0, + bias_scs, bias_zps)); + } + else + { + biasQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, weight_scs[0] * input_sc, 0)); + } + + biasWrapper->createTensor(graph, tim::vx::TensorAttribute::CONSTANT, biasQuant); + bias_index = tvGraph->addWrapper(biasWrapper); + inputsIndex.push_back(bias_index); + // Output tensor + CV_Assert(outputsWrapper.size() == 1); + auto outputWrapper = outputsWrapper[0].dynamicCast(); + Ptr outputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp)); + + if (isLast) + { + // From OpenCV NCHW, to TimVX WHCN + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + std::shared_ptr tvConv; + + if (ksize == 2) // for conv2d + { + int multiplier = 0; + if(group == tvConvWeightShape[3] && group != 1) + multiplier = 1; + if (group == 1 || (group == tvConvWeightShape[3] && group != 1)) // Conv2D || DeConv2D + { + if (tvPadType == tim::vx::PadType::AUTO) { + tvConv = graph->CreateOperation( + tvConvWeightShape[3], tvPadType, + std::array({(uint32_t) kernel_size[1], (uint32_t) kernel_size[0]}), + std::array({(uint32_t) strides[1], (uint32_t) strides[0]}), + std::array({(uint32_t) dilations[1], (uint32_t) dilations[0]}), + std::array({(uint32_t) pads_begin[1], (uint32_t) pads_end[1], + (uint32_t) pads_begin[0], (uint32_t) pads_end[0]}), + multiplier); + } + else + { + tvConv = graph->CreateOperation( + tvPadType, + std::array({(uint32_t) strides[1], (uint32_t) strides[0]}), + std::array({(uint32_t) dilations[1], (uint32_t) dilations[0]}), + multiplier); + } + } + else + { + // GroupedConv2d + if (tvPadType == tim::vx::PadType::AUTO) + { + tvConv = graph->CreateOperation( + std::array({(uint32_t) pads_begin[1], (uint32_t) pads_end[1], + (uint32_t) pads_begin[0], (uint32_t) pads_end[0]}), + std::array({(uint32_t)strides[1], (uint32_t)strides[0]}), + std::array({(uint32_t)dilations[1], (uint32_t)dilations[0]}), + group); + } + else + { + tvConv = graph->CreateOperation( + tvPadType, + std::array({(uint32_t)strides[1], (uint32_t)strides[0]}), + std::array({(uint32_t)dilations[1], (uint32_t)dilations[0]}), + group); + } + } + } + else + { + // for Conv1d + if (group != 1) + CV_Error( CV_StsNotImplemented, " Grouped Conv1d or Depth-Wise Conv1d are not supported by " + "TimVX Backend. Please try OpenCV Backend."); + tvConv = graph->CreateOperation( + tvConvWeightShape[2], tvPadType, (uint32_t)kernel_size[0], + (uint32_t)strides[0],(uint32_t)dilations[0], + std::array({(uint32_t)pads_begin[0], (uint32_t)pads_end[0]})); + } + // Create TimVXBackendNode + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvConv, inputsIndex, outputsIndex); + + return tvBackendNode; +#endif // HAVE_TIMVX + return Ptr(); + } + class ParallelConv : public cv::ParallelLoopBody { public: diff --git a/modules/dnn/src/int8layers/elementwise_layers.cpp b/modules/dnn/src/int8layers/elementwise_layers.cpp index 75118b6bc1..f1b78f48fb 100644 --- a/modules/dnn/src/int8layers/elementwise_layers.cpp +++ b/modules/dnn/src/int8layers/elementwise_layers.cpp @@ -4,6 +4,7 @@ #include "../precomp.hpp" #include "layers_common.hpp" +#include "../op_timvx.hpp" #include #include @@ -16,14 +17,45 @@ namespace dnn class ActivationLayerInt8Impl CV_FINAL : public ActivationLayerInt8 { public: + int input_zp, output_zp; + float input_sc, output_sc; + float slope = 0.0f; + +#ifdef HAVE_TIMVX + tvActivationType tvActType; +#endif ActivationLayerInt8Impl(const LayerParams ¶ms) { setParamsFrom(params); activationLUT = !blobs.empty() ? blobs[0] : Mat(); + + input_zp = params.get("input_zeropoint"); + input_sc = params.get("input_scale"); + output_zp = params.get("zeropoints"); + output_sc = params.get("scales"); + + if (params.has("slope")) + { + slope = params.get("slope"); + } + +#ifdef HAVE_TIMVX + tvActType = getTimVXActType(type); +#endif + } virtual bool supportBackend(int backendId) CV_OVERRIDE { +#ifdef HAVE_TIMVX + if (backendId == DNN_BACKEND_TIMVX) + { + // TODO!: Leaky ReLU will be supported in future. + if (tvActType == tvActReLU && slope != 0.f) + return false; + return tvActType != tvActNotSupported; + } +#endif return backendId == DNN_BACKEND_OPENCV; } @@ -106,6 +138,112 @@ public: } }; + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { +#ifdef HAVE_TIMVX + // tvGraph Initialization. + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + std::vector inputsIndex, outputsIndex; + int input_index, output_index; + CV_Assert(inputsWrapper.size() == 1); + + // input Tensor + Ptr inputWrapper = inputsWrapper[0].dynamicCast(); + + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if(input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor()) + { + Ptr tvInputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, input_sc, input_zp)); + inputWrapper->createTensor(graph, tim::vx::TensorAttribute::INPUT, tvInputQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + + inputsIndex.push_back(input_index); + + // output tensor + CV_Assert(outputsWrapper.size() == 1); + Ptr outputWrapper = outputsWrapper[0].dynamicCast(); + Ptr outputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp)); + + Ptr outputTensor; + + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + std::shared_ptr tvAct; + + switch(tvActType) { + case tvActReLU: + { + if (slope != 0.f) + tvAct = graph->CreateOperation(slope); + else + tvAct = graph->CreateOperation(); + break; + } + case tvActReLU6: + tvAct = graph->CreateOperation(); + break; + case tvActTanH: + tvAct = graph->CreateOperation(); + break; + case tvActSwish: + tvAct = graph->CreateOperation(); + break; + case tvActMish: + tvAct = graph->CreateOperation(); + break; + case tvActSigmoid: + tvAct = graph->CreateOperation(); + break; + case tvActELU: + tvAct = graph->CreateOperation(); + break; + default: + // TODO! check the default function. + tvAct = graph->CreateOperation(); + break; + } + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvAct, inputsIndex, outputsIndex); + + return tvBackendNode; +#endif // HAVE_TIMVX + return Ptr(); + } + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE { CV_TRACE_FUNCTION(); diff --git a/modules/dnn/src/int8layers/eltwise_layer.cpp b/modules/dnn/src/int8layers/eltwise_layer.cpp index a522bc9031..e0a8d4787c 100644 --- a/modules/dnn/src/int8layers/eltwise_layer.cpp +++ b/modules/dnn/src/int8layers/eltwise_layer.cpp @@ -4,6 +4,7 @@ #include "../precomp.hpp" #include "layers_common.hpp" +#include "../op_timvx.hpp" #include namespace cv @@ -22,6 +23,10 @@ public: } op; std::vector coeffs; std::vector zeropoints; + std::vector scales; + + int output_zp; + float output_sc; enum OutputChannelsMode { @@ -84,6 +89,20 @@ public: } } + if (params.has("input_scales")) + { + DictValue sc = params.get("input_scales"); + int i, n = sc.size(); + scales.resize(n); + for (i = 0; i < n; i++) + { + scales[i] = sc.get(i); + } + } + + output_zp = params.get("zeropoints"); + output_sc = params.get("scales"); + channelsModeInput = ELTWISE_CHANNNELS_SAME; if (params.has("output_channels_mode")) { @@ -116,6 +135,9 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { + // For TimVX Backend, only ELTWISE_CHANNNELS_SAME was supported. + if (backendId == DNN_BACKEND_TIMVX && haveTimVX()) + return channelsModeInput == ELTWISE_CHANNNELS_SAME; return backendId == DNN_BACKEND_OPENCV; } @@ -219,6 +241,134 @@ public: } } + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { +#ifdef HAVE_TIMVX + // tvGraph Initialization. + if (inputsWrapper.size() != 2) + return Ptr(); + + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + bool isSub = false; + // TODO: support variable coeffs. + if (op == SUM) + { + CV_Assert(coeffs.size() == scales.size()); + std::vector originalCoeffs; + + for (int i = 0; i < coeffs.size(); i++) + { + originalCoeffs.push_back(coeffs[i] * output_sc / scales[i]); + } + + float eps = std::numeric_limits::epsilon(); + if (std::fabs(originalCoeffs[0] - 1.0f) <= eps * std::fabs(originalCoeffs[0] + 1.0f) && + std::fabs(originalCoeffs[1] + 1.0f) <= eps * std::fabs(originalCoeffs[1] - 1.0f)) + { + // Sub, if coeffs = {1., -1.}, isSub = true. + isSub = true; + } + else if (std::fabs(originalCoeffs[0] - 1.0f) <= eps * std::fabs(originalCoeffs[0] + 1.0f) && + std::abs(originalCoeffs[1] - 1.0f) <= eps * std::abs(originalCoeffs[1] + 1.0f)) + { + // Sum, if coeff = {1., 1.}, isSub = false. + isSub = false; + } + else + { + return Ptr(); + } + } + + std::vector inputsIndex, outputsIndex; + int input_index = -1, output_index = -1; + CV_Assert(channelsModeInput == ELTWISE_CHANNNELS_SAME); + + // Input + Ptr inputWrapper; + + CV_Assert(!scales.empty() && !zeropoints.empty()); + + for (int i = 0; i(); + + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if (input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor()) + { + Ptr tvInputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, scales[i], zeropoints[i])); + inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT, tvInputQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + + inputsIndex.push_back(input_index); + } + + // Output + CV_Assert(outputsWrapper.size() == 1); + Ptr outputWrapper = outputsWrapper[0].dynamicCast(); + Ptr outputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp)); + + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + std::shared_ptr tvEltwise; + + switch (op) { + case SUM: + if (isSub) + tvEltwise = graph->CreateOperation(); + else + tvEltwise = graph->CreateOperation(); + break; + case PROD: + tvEltwise = graph->CreateOperation(); + break; + case MAX: + tvEltwise = graph->CreateOperation(); + break; + default: + CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation"); + } + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvEltwise, inputsIndex, outputsIndex); + + return tvBackendNode; +#endif // HAVE_TIMVX + return Ptr(); + } + class EltwiseInvoker : public ParallelLoopBody { EltwiseLayerInt8Impl& self; diff --git a/modules/dnn/src/int8layers/fully_connected_layer.cpp b/modules/dnn/src/int8layers/fully_connected_layer.cpp index 83da677a47..0887388b0b 100644 --- a/modules/dnn/src/int8layers/fully_connected_layer.cpp +++ b/modules/dnn/src/int8layers/fully_connected_layer.cpp @@ -4,6 +4,7 @@ #include "../precomp.hpp" #include "layers_common.hpp" +#include "../op_timvx.hpp" #include @@ -19,7 +20,11 @@ public: FullyConnectedLayerInt8Impl(const LayerParams& params) { setParamsFrom(params); + + input_sc = params.get("input_scale"); + input_zp = params.get("input_zeropoint"); output_zp = params.get("zeropoints"); + output_sc = params.get("scales"); axis = params.get("axis", 1); if (blobs.size() == 3) { @@ -71,11 +76,25 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { + if (backendId == DNN_BACKEND_TIMVX && haveTimVX()) + { + if (biasMat.empty()) + return true; + else + return false; + } + return backendId == DNN_BACKEND_OPENCV; } virtual bool setActivation(const Ptr& layer) CV_OVERRIDE { + // TODO! add activation in Fully connection. +#ifdef HAVE_TIMVX + if(preferableTarget == DNN_TARGET_NPU) + return false; +#endif + Ptr activ_int8 = layer.dynamicCast(); if (!activ_int8.empty()) { @@ -87,6 +106,120 @@ public: return false; } + + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { +#ifdef HAVE_TIMVX + // tvGraph Initialization. + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + int numOutput = blobs[0].size[0]; + Mat weightMat = blobs[0]; + + std::vector inputsIndex; + std::vector outputsIndex; + + std::vector weight_scs, bias_scs; + std::vector weight_zps; + + bias_scs.resize(numOutput); + weight_scs.resize(numOutput); + + for (int i = 0; i < numOutput; i++) + { + bias_scs[i] = outputMultiplier.at(i) * output_sc; + weight_scs[i] = bias_scs[i] / input_sc; + } + + weight_zps.assign(numOutput, 0); + + // input Tensor + auto inputWrapper = inputsWrapper[0].dynamicCast(); + int input_index = -1, weight_index = -1, output_index = -1; + + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if (input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor() || input_index == -1) + { + Ptr tvInputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, input_sc, input_zp)); + inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT, tvInputQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + inputsIndex.push_back(input_index); + + // weight tensor + Ptr weightWrapper = Ptr(new TimVXBackendWrapper(weightMat)); + Ptr weightQuant; + + bool tvSymmetric; + tvSymmetric = getQuantType(weight_scs, numOutput); + + if (tvSymmetric) + { + // TODO! fix the following issue. + // TimVX does not support the SYMMETRIC PER CHANNEL MatMul. + return Ptr(); + } + else + { + weightQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, weight_scs[0], 0)); + } + weightWrapper->createTensor(graph,tim::vx::TensorAttribute::CONSTANT, weightQuant); + + weight_index = tvGraph->addWrapper(weightWrapper); + inputsIndex.push_back(weight_index); + + // Output tensor + CV_Assert(outputsWrapper.size() == 1); + Ptr outputWrapper = outputsWrapper[0].dynamicCast(); + Ptr outputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp)); + + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + std::shared_ptr tvMatmul; + + tvMatmul = graph->CreateOperation(false, true); + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvMatmul, inputsIndex, outputsIndex); + + return tvBackendNode; +#endif // HAVE_TIMVX + return Ptr(); + } + class FullyConnected : public ParallelLoopBody { public: diff --git a/modules/dnn/src/int8layers/pooling_layer.cpp b/modules/dnn/src/int8layers/pooling_layer.cpp index 20a0486a46..98cf17c06c 100644 --- a/modules/dnn/src/int8layers/pooling_layer.cpp +++ b/modules/dnn/src/int8layers/pooling_layer.cpp @@ -4,6 +4,7 @@ #include "../precomp.hpp" #include "layers_common.hpp" +#include "../op_timvx.hpp" #include "opencv2/core/hal/intrin.hpp" #include @@ -26,9 +27,12 @@ public: globalPooling = false; isGlobalPooling = std::vector(3, false); output_zp = params.get("zeropoints"); - input_zp = params.get("input_zeropoint", 0); + input_zp = params.get("input_zeropoint", output_zp); multiplier = params.get("multiplier", 1.f); + output_sc = params.get("scales"); + input_sc = multiplier * output_sc; + hasDynamicShapes = params.get("has_dynamic_shapes", false); shapesInitialized = !hasDynamicShapes; @@ -103,6 +107,24 @@ public: else return false; } + else if (backendId == DNN_BACKEND_TIMVX && haveTimVX()) + { + // Only pool 2d and pool 1d were supported. + if (kernel_size.size() == 3) + { + // fallback to CPU implementation. + preferableTarget = DNN_TARGET_CPU; + return false; + } + if (!avePoolPaddedArea) // TimVX does not support exclude padding. + return false; + if (globalPooling) // TODO support globalPooling in TimVX backend. + return false; + if (kernel_size.size() == 2) + return type == MAX || type == AVE; + return false; + } + return false; } @@ -116,6 +138,139 @@ public: return false; } + + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { +#ifdef HAVE_TIMVX + // tvGraph Initialization. + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + tim::vx::PoolType tvPoolType; + tim::vx::RoundType tvRoundType; + size_t ksize = kernel_size.size(); + if (ksize != 2) + return Ptr(); + + // type Change from OpenCV to TimVX only MAX and AVG are supported. + switch (type) { + case MAX: { + tvPoolType = tim::vx::PoolType::MAX; + break; + } + case AVE:{ + tvPoolType = tim::vx::PoolType::AVG; + break; + } + default: + CV_Error(Error::StsNotImplemented, "Not implemented Pooling type in TimVX Backend."); + } + + // Padding Type + tim::vx::PadType tvPadType; + if (padMode.empty()) + { + tvPadType = tim::vx::PadType::AUTO; // TODO! check the padding type. + } + else if(padMode == "VALID") + { + tvPadType = tim::vx::PadType::VALID; + } + else if (padMode == "SAME") + { + tvPadType = tim::vx::PadType::SAME; + } + else + { + CV_Error(Error::StsError, "Unsupported padding mode in TimVXBackend!"); + } + + if (ceilMode) + tvRoundType = tim::vx::RoundType::CEILING; + else + tvRoundType = tim::vx::RoundType::FLOOR; + + auto input = inputsWrapper[0]; + std::vector inputsIndex; + std::vector outputsIndex; + + // input Tensor + auto inputWrapper = inputsWrapper[0].dynamicCast(); + int input_index, output_index; + + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if (input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor()) + { + Ptr tvInputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, input_sc, input_zp)); + inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT, tvInputQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + inputsIndex.push_back(input_index); + + // Output tensor + CV_Assert(outputsWrapper.size() == 1); + auto outputWrapper = outputsWrapper[0].dynamicCast(); + Ptr outputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp)); + + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + std::shared_ptr tvPool; + + if (tvPadType == tim::vx::PadType::AUTO) + { + tvPool = graph->CreateOperation( tvPoolType, + std::array({(uint32_t) pads_begin[1], (uint32_t) pads_end[1], + (uint32_t) pads_begin[0], (uint32_t) pads_end[0]}), + std::array({(uint32_t)kernel_size[1], (uint32_t)kernel_size[0]}), + std::array({(uint32_t)strides[1], (uint32_t)strides[0]}), + tvRoundType); + } + else + { + tvPool = graph->CreateOperation( + tvPoolType, tvPadType, + std::array({(uint32_t)kernel_size[1], (uint32_t)kernel_size[0]}), + std::array({(uint32_t)strides[1], (uint32_t)strides[0]}), + tvRoundType); + } + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvPool, inputsIndex, outputsIndex); + + return tvBackendNode; +#endif // HAVE_TIMVX + return Ptr(); + } + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE { CV_TRACE_FUNCTION(); diff --git a/modules/dnn/src/int8layers/quantization_utils.cpp b/modules/dnn/src/int8layers/quantization_utils.cpp index d72487639e..6e2f0bb61c 100644 --- a/modules/dnn/src/int8layers/quantization_utils.cpp +++ b/modules/dnn/src/int8layers/quantization_utils.cpp @@ -4,6 +4,7 @@ #include "../precomp.hpp" #include "layers_common.hpp" +#include "../op_timvx.hpp" namespace cv { @@ -149,15 +150,21 @@ public: class RequantizeLayerImpl CV_FINAL : public RequantizeLayer { public: + bool isEltwise; RequantizeLayerImpl(const LayerParams& params) { scale = params.get("scale", 1.f); shift = params.get("shift", 0.f); + isEltwise = params.get("isEltwise", false); setParamsFrom(params); } virtual bool supportBackend(int backendId) CV_OVERRIDE { + if (backendId == DNN_BACKEND_TIMVX && haveTimVX() && !isEltwise) + { + return true; + } return backendId == DNN_BACKEND_OPENCV; } @@ -178,6 +185,82 @@ public: outputs_arr.getMatVector(outputs); } + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { +#ifdef HAVE_TIMVX + // preprocessing + // Check if data is 8-bit. + CV_Assert(inputsWrapper.size() == 1 && outputsWrapper.size() == 1); + Ptr inputWrapper = inputsWrapper[0].dynamicCast(); + + if (!inputWrapper->isTensor()) + { + return Ptr(); + } + + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + std::vector inputsIndex, outputsIndex; + int input_index = -1, output_index = -1; + + // Input + std::shared_ptr inputTensor = inputWrapper->getTensor(); + input_index = tvGraph->getTensorIndex(inputTensor); + if (input_index == -1) + return Ptr(); + + inputsIndex.push_back(input_index); + + Ptr inputQuant = inputWrapper->getTensorQuantization(); + + tim::vx::QuantType quanType = inputQuant->Type(); + CV_Assert(quanType == tim::vx::QuantType::ASYMMETRIC); + + std::vector scales = inputQuant->Scales(); + std::vector zeropoints = inputQuant->ZeroPoints(); + CV_Assert(!scales.empty() && !zeropoints.empty()); + int input_zp = int(zeropoints[0]); + float input_scale = scales[0]; + + float tmpOut_sc = input_scale/scale; + int tmpOut_zp = int(shift + scale * input_zp); + + // Output + Ptr outputWrapper = outputsWrapper[0].dynamicCast(); + Ptr outputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, tmpOut_sc, tmpOut_zp)); + + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + std::shared_ptr tvRequantize = graph->CreateOperation(); + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvRequantize, inputsIndex, outputsIndex); + + return tvBackendNode; +#endif // HAVE_TIMVX + return Ptr(); + } + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE { CV_TRACE_FUNCTION(); diff --git a/modules/dnn/src/int8layers/reduce_layer.cpp b/modules/dnn/src/int8layers/reduce_layer.cpp new file mode 100644 index 0000000000..935bdc0659 --- /dev/null +++ b/modules/dnn/src/int8layers/reduce_layer.cpp @@ -0,0 +1,213 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "../precomp.hpp" +#include "layers_common.hpp" + +#include +#include +#include + +namespace cv +{ +namespace dnn +{ + +class ReduceLayerInt8Impl CV_FINAL : public ReduceLayerInt8 +{ +public: + ReduceLayerInt8Impl(const LayerParams& params) + { + // Set reduce type + CV_Assert(params.has("reduce")); + String typeString = toLowerCase(params.get("reduce")); + if (typeString == "max") + reduceType = MAX; + else if (typeString == "min") + reduceType = MIN; + else + CV_Error(Error::StsBadArg, "Unknown reduce type \"" + typeString + "\""); + + // Set deleted dims + CV_Assert(params.has("deleted_dims")); + DictValue tempDims = params.get("deleted_dims"); + int i, n = tempDims.size(); + reduceDims.resize(n); + for (i = 0; i < n; i++) + { + reduceDims[i] = tempDims.get(i); + } + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + if (backendId == DNN_BACKEND_OPENCV) + { + return true; + } + return false; + } + + // reduceType == MIN + struct ReduceOpMIN + { + int8_t apply(const int8_t* first, const int8_t* last) + { + return std::accumulate(first, last, *first, + [](int8_t a, int8_t b) + { + return std::min(a, b); + }); + } + }; + + // reduceType == MAX + struct ReduceOpMAX + { + int8_t apply(const int8_t* first, const int8_t* last) + { + return std::accumulate(first, last, *first, + [](int8_t a, int8_t b) + { + return std::max(a, b); + }); + } + }; + + template + class ReduceInvoker : public ParallelLoopBody + { + public: + const Mat* src; + Mat *dst; + std::vector reduceDims; + int nstripes; + int reduceType; + Ptr func; + + ReduceInvoker() : src(0), dst(0), nstripes(0), reduceType(MAX), func(makePtr()) {} + + static void run(const Mat& src, Mat& dst, std::vector reduceDims, int reduceType, int nstripes) + { + CV_Assert_N(src.isContinuous(), dst.isContinuous(), src.type() == CV_8S, src.type() == dst.type()); + + ReduceInvoker p; + + p.src = &src; + p.dst = &dst; + + p.reduceDims = reduceDims; + p.nstripes = nstripes; + p.reduceType = reduceType; + + parallel_for_(Range(0, nstripes), p, nstripes); + } + + void operator()(const Range& r) const CV_OVERRIDE + { + size_t total = dst->total(); + size_t stripeSize = (total + nstripes - 1)/nstripes; + size_t stripeStart = r.start*stripeSize; + size_t stripeEnd = std::min(r.end*stripeSize, total); + size_t totalDeleted = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies()); + + int8_t *dstData = (int8_t *)dst->data; + int8_t *srcData = (int8_t *)src->data; + + for (size_t ofs = stripeStart; ofs < stripeEnd;) + { + const int8_t* first = srcData + ofs * totalDeleted; + const int8_t* last = srcData + (ofs + 1) * totalDeleted; + + dstData[ofs] = func->apply(first, last); + ofs += 1; + } + } + }; + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + CV_Assert(inputs.size() == 1); + const int nstripes = getNumThreads(); + + switch (reduceType) + { + case MIN: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case MAX: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + default: + CV_Error(Error::StsNotImplemented, "Not implemented"); + break; + } + } + + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const CV_OVERRIDE + { + CV_Assert(inputs.size() > 0); + CV_Assert(reduceDims.size() != 0 && inputs[0].size() >= reduceDims.size()); + + std::vector outShape; + if (inputs[0].size() == reduceDims.size()) + outShape.push_back(1); + else + { + for (int i = 0; i < inputs[0].size() - reduceDims.size(); i++) + { + outShape.push_back(inputs[0][i]); + } + } + outputs.assign(1, outShape); + + return false; + } + + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + return false; + } + + virtual int64 getFLOPS(const std::vector &inputs, + const std::vector &outputs) const CV_OVERRIDE + { + CV_UNUSED(inputs); // suppress unused variable warning + long flops = 0; + size_t totalDeleted = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies()); + for (int i = 0; i < outputs.size(); i++) + { + flops += total(outputs[i])*(totalDeleted); + } + return flops; + } +private: + enum Type + { + MAX, + MIN + }; +}; + +Ptr ReduceLayerInt8::create(const LayerParams& params) +{ + return Ptr(new ReduceLayerInt8Impl(params)); +} + +} +} diff --git a/modules/dnn/src/int8layers/softmax_layer.cpp b/modules/dnn/src/int8layers/softmax_layer.cpp index 7e3c82bc21..b2caf56fb0 100644 --- a/modules/dnn/src/int8layers/softmax_layer.cpp +++ b/modules/dnn/src/int8layers/softmax_layer.cpp @@ -4,6 +4,7 @@ #include "../precomp.hpp" #include "layers_common.hpp" +#include "../op_timvx.hpp" #include #include @@ -16,11 +17,17 @@ namespace dnn class SoftMaxLayerInt8Impl CV_FINAL : public SoftmaxLayerInt8 { public: + float input_sc; + int input_zp; SoftMaxLayerInt8Impl(const LayerParams& params) { axisRaw = params.get("axis", 1); logSoftMax = params.get("log_softmax", false); + + input_sc = params.get("input_scale"); + input_zp = params.get("input_zeropoint"); + output_sc = params.get("scales"); output_zp = params.get("zeropoints"); setParamsFrom(params); @@ -41,7 +48,8 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { - return backendId == DNN_BACKEND_OPENCV; + return backendId == DNN_BACKEND_OPENCV || + (backendId == DNN_BACKEND_TIMVX && haveTimVX()); } virtual bool tryFuse(Ptr& top) CV_OVERRIDE @@ -50,6 +58,102 @@ public: return !dequantize_layer.empty() && preferableTarget != DNN_TARGET_OPENCL_FP16; } + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { +#ifdef HAVE_TIMVX + // tvGraph Initialization. + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + std::vector inputsIndex, outputsIndex; + int input_index, output_index; + + // input Tensor + CV_Assert(inputsWrapper.size() == 1); + Ptr inputWrapper = inputsWrapper[0].dynamicCast(); + const Mat &src = inputWrapper->getMat(); + + // convert axis from OpenCV NCHW toTimVX WHCN. + int axis = normalize_axis(axisRaw, src.dims); + int tvAxis = src.dims - 1 - axis; + if(tvAxis < 0) + tvAxis = 0; // default value is 0. + + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if (input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor()) + { + Ptr tvInputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, input_sc, input_zp)); + inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT, tvInputQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + inputsIndex.push_back(input_index); + + // output tensor + CV_Assert(outputsWrapper.size() == 1); + Ptr outputWrapper = outputsWrapper[0].dynamicCast(); + Mat dstMat = outputWrapper->getMat(); + Ptr outputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp)); + + Ptr outputTensor; + + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + if (dstMat.type() == CV_32F) + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT); + else + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + if (dstMat.type() == CV_32F) + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT); + else + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + std::shared_ptr tvSoftmax; + + if (logSoftMax) + { + tvSoftmax = graph->CreateOperation(tvAxis); + + } + else + { + tvSoftmax = graph->CreateOperation(1.0f, tvAxis); + } + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvSoftmax, inputsIndex, outputsIndex); + + return tvBackendNode; +#endif // HAVE_TIMVX + return Ptr(); + } + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE { CV_TRACE_FUNCTION(); diff --git a/modules/dnn/src/layer.cpp b/modules/dnn/src/layer.cpp new file mode 100644 index 0000000000..0ed3488da6 --- /dev/null +++ b/modules/dnn/src/layer.cpp @@ -0,0 +1,257 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +Layer::Layer() { preferableTarget = DNN_TARGET_CPU; } + +Layer::Layer(const LayerParams& params) + : blobs(params.blobs) + , name(params.name) + , type(params.type) +{ + preferableTarget = DNN_TARGET_CPU; +} + +void Layer::setParamsFrom(const LayerParams& params) +{ + blobs = params.blobs; + name = params.name; + type = params.type; +} + +int Layer::inputNameToIndex(String) +{ + return -1; +} + +int Layer::outputNameToIndex(const String&) +{ + return 0; +} + +bool Layer::supportBackend(int backendId) +{ + return backendId == DNN_BACKEND_OPENCV; +} + +Ptr Layer::initCUDA( + void*, + const std::vector>&, + const std::vector>&) +{ + CV_Error(Error::StsNotImplemented, "CUDA pipeline of " + type + " layers is not defined."); + return Ptr(); +} + +Ptr Layer::initVkCom(const std::vector>&) +{ + CV_Error(Error::StsNotImplemented, "VkCom pipeline of " + type + " layers is not defined."); + return Ptr(); +} + +Ptr Layer::initHalide(const std::vector>&) +{ + CV_Error(Error::StsNotImplemented, "Halide pipeline of " + type + " layers is not defined."); + return Ptr(); +} + +Ptr Layer::initNgraph(const std::vector>& inputs, const std::vector>& nodes) +{ + CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type + " layers is not defined."); + return Ptr(); +} + +Ptr Layer::initWebnn(const std::vector>& inputs, const std::vector>& nodes) +{ + CV_Error(Error::StsNotImplemented, "WebNN pipeline of " + type + " layers is not defined."); + return Ptr(); +} + +Ptr Layer::initTimVX(void* timVxInfo, + const std::vector > & inputsWrapper, + const std::vector > & outputsWrapper, + bool isLast) +{ + CV_Error(Error::StsNotImplemented, "TimVX pipeline of " + type + + " layers is not defined."); + return Ptr(); +} + +Ptr Layer::tryAttach(const Ptr& node) +{ + return Ptr(); +} + +bool Layer::setActivation(const Ptr&) { return false; } +bool Layer::tryFuse(Ptr&) { return false; } +void Layer::getScaleShift(Mat& scale, Mat& shift) const +{ + scale = Mat(); + shift = Mat(); +} + +void Layer::getScaleZeropoint(float& scale, int& zeropoint) const +{ + scale = 1.f; + zeropoint = 0; +} + +void Layer::unsetAttached() +{ + setActivation(Ptr()); +} + +template +static void vecToPVec(const std::vector& v, std::vector& pv) +{ + pv.resize(v.size()); + for (size_t i = 0; i < v.size(); i++) + pv[i] = const_cast(&v[i]); +} + +void Layer::finalize(const std::vector& inputs, std::vector& outputs) +{ + CV_TRACE_FUNCTION(); + this->finalize((InputArrayOfArrays)inputs, (OutputArrayOfArrays)outputs); +} + +void Layer::finalize(const std::vector& input, std::vector& output) +{ + CV_UNUSED(input); + CV_UNUSED(output); +} + +void Layer::finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) +{ + CV_TRACE_FUNCTION(); + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + + std::vector inputsp; + vecToPVec(inputs, inputsp); + this->finalize(inputsp, outputs); +} + +std::vector Layer::finalize(const std::vector& inputs) +{ + CV_TRACE_FUNCTION(); + + std::vector outputs; + this->finalize(inputs, outputs); + return outputs; +} + +void Layer::forward(std::vector& input, std::vector& output, std::vector& internals) +{ + // We kept this method for compatibility. DNN calls it now only to support users' implementations. +} + +void Layer::forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr); +} + +void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S) + { + std::vector inputs; + std::vector outputs; + std::vector internals; + + std::vector orig_inputs; + std::vector orig_outputs; + std::vector orig_internals; + + inputs_arr.getUMatVector(orig_inputs); + outputs_arr.getUMatVector(orig_outputs); + internals_arr.getUMatVector(orig_internals); + + inputs.resize(orig_inputs.size()); + for (size_t i = 0; i < orig_inputs.size(); i++) + convertFp16(orig_inputs[i], inputs[i]); + + outputs.resize(orig_outputs.size()); + for (size_t i = 0; i < orig_outputs.size(); i++) + outputs[i].create(shape(orig_outputs[i]), CV_32F); + + internals.resize(orig_internals.size()); + for (size_t i = 0; i < orig_internals.size(); i++) + internals[i].create(shape(orig_internals[i]), CV_32F); + + forward(inputs, outputs, internals); + + for (size_t i = 0; i < outputs.size(); i++) + convertFp16(outputs[i], orig_outputs[i]); + + // sync results back + outputs_arr.assign(orig_outputs); + internals_arr.assign(orig_internals); + return; + } + std::vector inpvec; + std::vector outputs; + std::vector internals; + + inputs_arr.getMatVector(inpvec); + outputs_arr.getMatVector(outputs); + internals_arr.getMatVector(internals); + + std::vector inputs(inpvec.size()); + for (int i = 0; i < inpvec.size(); i++) + inputs[i] = &inpvec[i]; + + this->forward(inputs, outputs, internals); + + // sync results back + outputs_arr.assign(outputs); + internals_arr.assign(internals); +} + +void Layer::run(const std::vector& inputs, std::vector& outputs, std::vector& internals) +{ + CV_TRACE_FUNCTION(); + + this->finalize(inputs, outputs); + this->forward(inputs, outputs, internals); +} + +bool Layer::tryQuantize(const std::vector>& scales, + const std::vector>& zeropoints, LayerParams& params) +{ + return false; +} + +Layer::~Layer() {} + +bool Layer::getMemoryShapes(const std::vector& inputs, + const int requiredOutputs, + std::vector& outputs, + std::vector& internals) const +{ + CV_Assert(inputs.size()); + outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]); + return false; +} + +bool Layer::updateMemoryShapes(const std::vector& inputs) +{ + return true; +} + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/layer_factory.cpp b/modules/dnn/src/layer_factory.cpp new file mode 100644 index 0000000000..5c80cd09ad --- /dev/null +++ b/modules/dnn/src/layer_factory.cpp @@ -0,0 +1,111 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include + +#include // getLayerFactoryImpl + + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + +Mutex& getLayerFactoryMutex() +{ + static Mutex* volatile instance = NULL; + if (instance == NULL) + { + cv::AutoLock lock(getInitializationMutex()); + if (instance == NULL) + instance = new Mutex(); + } + return *instance; +} + +static LayerFactory_Impl& getLayerFactoryImpl_() +{ + static LayerFactory_Impl impl; + return impl; +} + +LayerFactory_Impl& getLayerFactoryImpl() +{ + static LayerFactory_Impl* volatile instance = NULL; + if (instance == NULL) + { + cv::AutoLock lock(getLayerFactoryMutex()); + if (instance == NULL) + { + instance = &getLayerFactoryImpl_(); + initializeLayerFactory(); + } + } + return *instance; +} + +void LayerFactory::registerLayer(const String& type, Constructor constructor) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(type, "type", type.c_str()); + + cv::AutoLock lock(getLayerFactoryMutex()); + LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type); + + if (it != getLayerFactoryImpl().end()) + { + if (it->second.back() == constructor) + CV_Error(cv::Error::StsBadArg, "Layer \"" + type + "\" already was registered"); + it->second.push_back(constructor); + } + getLayerFactoryImpl().insert(std::make_pair(type, std::vector(1, constructor))); +} + +void LayerFactory::unregisterLayer(const String& type) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(type, "type", type.c_str()); + + cv::AutoLock lock(getLayerFactoryMutex()); + + LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type); + if (it != getLayerFactoryImpl().end()) + { + if (it->second.size() > 1) + it->second.pop_back(); + else + getLayerFactoryImpl().erase(it); + } +} + +bool LayerFactory::isLayerRegistered(const std::string& type) +{ + cv::AutoLock lock(getLayerFactoryMutex()); + auto& registeredLayers = getLayerFactoryImpl(); + return registeredLayers.find(type) != registeredLayers.end(); +} + +Ptr LayerFactory::createLayerInstance(const String& type, LayerParams& params) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(type, "type", type.c_str()); + + cv::AutoLock lock(getLayerFactoryMutex()); + LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type); + + if (it != getLayerFactoryImpl().end()) + { + CV_Assert(!it->second.empty()); + return it->second.back()(params); + } + else + { + return Ptr(); // NULL + } +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/layer_internals.hpp b/modules/dnn/src/layer_internals.hpp new file mode 100644 index 0000000000..9ded3543e1 --- /dev/null +++ b/modules/dnn/src/layer_internals.hpp @@ -0,0 +1,335 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef __OPENCV_DNN_SRC_LAYER_INTERNALS_HPP__ +#define __OPENCV_DNN_SRC_LAYER_INTERNALS_HPP__ + +namespace cv { namespace dnn { +CV__DNN_INLINE_NS_BEGIN +inline namespace detail { + +struct LayerPin +{ + int lid; + int oid; + + LayerPin(int layerId = -1, int outputId = -1) + : lid(layerId) + , oid(outputId) + {} + + bool valid() const + { + return (lid >= 0 && oid >= 0); + } + + bool equal(const LayerPin& r) const + { + return (lid == r.lid && oid == r.oid); + } + + bool operator<(const LayerPin& r) const + { + return lid < r.lid || (lid == r.lid && oid < r.oid); + } + + bool operator==(const LayerPin& r) const + { + return lid == r.lid && oid == r.oid; + } +}; + +struct LayerData +{ + LayerData() + : id(-1) + , dtype(CV_32F) + , skip(false) + , flag(0) + {} + LayerData(int _id, const String& _name, const String& _type, const int& _dtype, LayerParams& _params) + : id(_id) + , name(_name) + , type(_type) + , dtype(_dtype) + , params(_params) + , skip(false) + , flag(0) + { + CV_TRACE_FUNCTION(); + + // add logging info + params.name = name; + params.type = type; + } + + int id; + String name; + String type; + int dtype; // Datatype of output blobs. + LayerParams params; + + std::vector inputBlobsId; + std::set inputLayersId; + std::set requiredOutputs; + std::vector consumers; + std::vector> outputBlobsWrappers; + std::vector> inputBlobsWrappers; + std::vector> internalBlobsWrappers; + +#ifdef HAVE_CUDA + /* output ids which must be transferred to the host in the background + * after the completion of the forward pass of the layer + */ + std::vector cudaD2HBackgroundTransfers; +#endif + + Ptr layerInstance; + std::vector outputBlobs; + std::vector inputBlobs; + std::vector internals; + // Computation nodes of implemented backends (except DEFAULT). + std::map> backendNodes; + // Flag for skip layer computation for specific backend. + bool skip; + + int flag; + + Ptr getLayerInstance() + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(type, "type", type.c_str()); + + if (layerInstance) + return layerInstance; + + layerInstance = LayerFactory::createLayerInstance(type, params); + if (!layerInstance) + { + CV_Error(Error::StsError, "Can't create layer \"" + name + "\" of type \"" + type + "\""); + } + + return layerInstance; + } +}; + + +// fake layer containing network input blobs +struct DataLayer : public Layer +{ + DataLayer() + : Layer() + { + skip = false; + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + return backendId == DNN_BACKEND_OPENCV; + } + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + // FIXIT: add wrapper without exception suppression + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + + bool isFP16 = outputs_arr.depth() == CV_16S; + + std::vector outputs, internals; + outputs_arr.getMatVector(outputs); + internals_arr.getMatVector(internals); + + for (int i = 0; i < inputsData.size(); ++i) + { + double scale = scaleFactors[i]; + Scalar& mean = means[i]; + + CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4); + if (isFP16) + CV_CheckTypeEQ(outputs[i].type(), CV_16SC1, ""); + else + CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, ""); + + bool singleMean = true; + for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j) + { + singleMean = mean[j] == mean[j - 1]; + } + + if (singleMean) + { + if (isFP16) + { + Mat input_f32; + inputsData[i].convertTo(input_f32, CV_32F, scale, -mean[0] * scale); + convertFp16(input_f32, outputs[i]); + } + else + { + inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale); + } + } + else + { + for (int n = 0; n < inputsData[i].size[0]; ++n) + { + for (int c = 0; c < inputsData[i].size[1]; ++c) + { + Mat inp = getPlane(inputsData[i], n, c); + Mat out = getPlane(outputs[i], n, c); + if (isFP16) + { + Mat input_f32; + inp.convertTo(input_f32, CV_32F, scale, -mean[c] * scale); + convertFp16(input_f32, out); + } + else + { + inp.convertTo(out, CV_32F, scale, -mean[c] * scale); + } + } + } + } + } + } + +#ifdef HAVE_OPENCL + bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) + { + bool isFP16 = outputs_.depth() == CV_16S; + + std::vector outputs; + outputs_.getUMatVector(outputs); + + for (int i = 0; i < inputsData.size(); ++i) + { + Mat inputData = inputsData[i]; + + double scale = scaleFactors[i]; + Scalar& mean = means[i]; + + CV_Assert(mean == Scalar() || inputData.size[1] <= 4); + if (isFP16) + CV_CheckTypeEQ(outputs[i].type(), CV_16SC1, ""); + else + CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, ""); + + bool singleMean = true; + for (int j = 1; j < std::min(4, inputData.size[1]) && singleMean; ++j) + { + singleMean = mean[j] == mean[j - 1]; + } + + if (singleMean) + { + if (isFP16) + { + UMat input_i; + inputData.convertTo(input_i, CV_32F, scale, -mean[0] * scale); + convertFp16(input_i, outputs[i]); + } + else + { + inputData.convertTo(outputs[i], CV_32F, scale, -mean[0] * scale); + } + } + else + { + for (int n = 0; n < inputData.size[0]; ++n) + { + for (int c = 0; c < inputData.size[1]; ++c) + { + Mat inp = getPlane(inputData, n, c); + + std::vector plane(4, Range::all()); + plane[0] = Range(n, n + 1); + plane[1] = Range(c, c + 1); + UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size); + + if (isFP16) + { + UMat input_i; + inp.convertTo(input_i, CV_32F, scale, -mean[c] * scale); + convertFp16(input_i, out); + } + else + { + inp.convertTo(out, CV_32F, scale, -mean[c] * scale); + } + } + } + } + } + return true; + } +#endif + + int outputNameToIndex(const String& tgtName) CV_OVERRIDE + { + int idx = (int)(std::find(outNames.begin(), outNames.end(), tgtName) - outNames.begin()); + return (idx < (int)outNames.size()) ? idx : -1; + } + + void setNames(const std::vector& names) + { + outNames.assign(names.begin(), names.end()); + shapes.clear(); + shapes.resize(outNames.size()); + } + + void setInputShape(const String& tgtName, const MatShape& shape) + { + std::vector::const_iterator it = std::find(outNames.begin(), outNames.end(), tgtName); + CV_Check(tgtName, it != outNames.end(), "Unknown input"); + int idx = (int)(it - outNames.begin()); + + CV_Assert(idx < (int)shapes.size()); + CV_Check(tgtName, shapes[idx].empty(), "Input shape redefinition is not allowed"); + shapes[idx] = shape; + } + + bool getMemoryShapes(const std::vector& inputs, + const int requiredOutputs, + std::vector& outputs, + std::vector& internals) const CV_OVERRIDE + { + CV_Assert(inputs.size() == requiredOutputs); + outputs.assign(inputs.begin(), inputs.end()); + return false; + } + + virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE + { + std::vector outputs; + outputs_arr.getMatVector(outputs); + + CV_Assert_N(outputs.size() == scaleFactors.size(), outputs.size() == means.size(), + inputsData.size() == outputs.size()); + skip = true; + for (int i = 0; skip && i < inputsData.size(); ++i) + { + if (inputsData[i].data != outputs[i].data || scaleFactors[i] != 1.0 || means[i] != Scalar()) + skip = false; + } + } + + + std::vector outNames; + std::vector shapes; + // Preprocessing parameters for each network's input. + std::vector scaleFactors; + std::vector means; + std::vector inputsData; + bool skip; +}; // DataLayer + + +} // namespace detail +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn +#endif // __OPENCV_DNN_SRC_LAYER_INTERNALS_HPP__ diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index bb81f14425..377e05f5cc 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -409,6 +409,7 @@ public: { params.set("input_scale", scales[0][0]); params.set("input_zeropoint", zeropoints[0][0]); + params.set("eps", epsilon); params.blobs.clear(); params.blobs.push_back(origin_weights); diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp index 675546f76f..5ba0cd199b 100644 --- a/modules/dnn/src/layers/concat_layer.cpp +++ b/modules/dnn/src/layers/concat_layer.cpp @@ -48,6 +48,7 @@ #include "../ie_ngraph.hpp" #include "../op_vkcom.hpp" #include "../op_webnn.hpp" +#include "../op_timvx.hpp" #ifdef HAVE_OPENCL #include "opencl_kernels_dnn.hpp" @@ -72,6 +73,9 @@ public: axis = params.get("axis", 1); padding = params.get("padding", false); paddingValue = params.get("padding_value", 0); + + zeropoint = params.get("zeropoints", 0); + scale = params.get("scales", 1.0f); } virtual bool getMemoryShapes(const std::vector &inputs, @@ -113,6 +117,21 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { +#ifdef HAVE_TIMVX + if (backendId == DNN_BACKEND_TIMVX && haveTimVX() && !padding) + { + if (axis == -1) + return false; + int len = this->type.length(); + if (len <= 4) + return false; + if (this->type.substr(len - 4) == "Int8") + return true; + else + return false; + } +#endif + #ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) return true; @@ -393,6 +412,86 @@ public: } #endif // HAVE_DNN_NGRAPH +#ifdef HAVE_TIMVX + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { + // tvGraph Initialization. + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + Ptr inputWrapper = inputsWrapper[0].dynamicCast(); + // convert axis from OpenCV NCHW toTimVX WHCN. + Mat blob0 = inputWrapper->getMat(); + + // TODO! support TimVX 5 dim in future. + if(blob0.dims >4) + return Ptr(); + + int cAxis = normalize_axis(axis, blob0.dims); + int tvAxis = blob0.dims - 1 - cAxis; + CV_Assert(tvAxis>= 0); + std::vector inputsIndex, outputsIndex; + int input_index = -1, output_index = -1; + + // Input + Ptr tvQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, scale, zeropoint)); + + for (int i = 0; i(); + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if (input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor()) + { + inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT, tvQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + inputsIndex.push_back(input_index); + } + + //Output + CV_Assert(outputsWrapper.size() == 1); + Ptr outputWrapper = outputsWrapper[0].dynamicCast(); + + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, tvQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, tvQuant); + } + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + std::shared_ptr tvConcate = graph->CreateOperation(tvAxis, inputsWrapper.size()); + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvConcate, inputsIndex, outputsIndex); + + return tvBackendNode; + } +#endif // HAVE_TIMVX + virtual bool tryQuantize(const std::vector > &scales, const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE { @@ -416,6 +515,8 @@ public: } #endif + int zeropoint; + float scale; }; Ptr ConcatLayer::create(const LayerParams& params) diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 1af34472df..0bf39f93b3 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -2080,6 +2080,7 @@ public: { auto context = reinterpret_cast(context_); + // TODO: extract bias from inputs and pass it CV_Assert(inputs.size() == 1 || inputs.size() == 2); auto input_wrapper = inputs[0].dynamicCast(); auto input_shape = input_wrapper->getShape(); @@ -2168,6 +2169,7 @@ public: float inputScale = scales[0][0], outputScale = scales[1][0]; int inputZp = zeropoints[0][0]; params.set("input_zeropoint", inputZp); + params.set("input_scale", inputScale); Mat weightsQuantized(weightsMat.rows, weightsMat.cols, CV_8S); Mat biasQuantized(1, numOutput, CV_32S); diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index 0085ba7449..353ce8c0b4 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -496,6 +496,9 @@ struct ReLUFunctor : public BaseFunctor params.blobs.clear(); params.blobs.push_back(lookUpTable); } + params.set("input_scale", scales[0][0]); + params.set("input_zeropoint", zeropoints[0][0]); + params.set("slope", slope); return true; } @@ -635,6 +638,8 @@ struct ReLU6Functor : public BaseFunctor bool tryQuantize(const std::vector > &scales, const std::vector > &zeropoints, LayerParams& params) { + params.set("input_scale", scales[0][0]); + params.set("input_zeropoint", zeropoints[0][0]); return true; } @@ -704,6 +709,8 @@ struct BaseDefaultFunctor : public BaseFunctor } params.blobs.clear(); params.blobs.push_back(lookUpTable); + params.set("input_scale", scales[0][0]); + params.set("input_zeropoint", zeropoints[0][0]); return true; } @@ -2263,6 +2270,96 @@ struct ChannelsPReLUFunctor : public BaseFunctor int64 getFLOPSPerElement() const { return 1; } }; +struct SignFunctor : public BaseDefaultFunctor +{ + typedef SignLayer Layer; + + bool supportBackend(int backendId, int) + { + return backendId == DNN_BACKEND_OPENCV || + backendId == DNN_BACKEND_CUDA; + } + + inline float calculate(float x) const + { + return x > 0.f ? 1.f : (x < 0.f ? -1.f : 0.f); + } + +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) + { + return make_cuda_node(target, stream); + } +#endif + + int64 getFLOPSPerElement() const { return 1; } +}; + +template<> +const char* const SignFunctor::BaseDefaultFunctor::ocl_kernel_name = "SignForward"; + + +struct ShrinkFunctor : public BaseDefaultFunctor +{ + typedef ShrinkLayer Layer; + float bias; + float lambd; + + explicit ShrinkFunctor(float bias_ = 0.0f, float lambd_ = 0.5f) : bias(bias_), lambd(lambd_) {} + + bool supportBackend(int backendId, int) + { + return backendId == DNN_BACKEND_OPENCV || + backendId == DNN_BACKEND_CUDA; + } + + inline float calculate(float x) const + { + return x > lambd ? x - bias : (x < -lambd ? x + bias : 0.f); + } + +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) + { + return make_cuda_node(target, stream, bias, lambd); + } +#endif + + int64 getFLOPSPerElement() const { return 1; } +}; + +template<> +const char* const ShrinkFunctor::BaseDefaultFunctor::ocl_kernel_name = "ShrinkForward"; + +struct ReciprocalFunctor : public BaseDefaultFunctor +{ + typedef ReciprocalLayer Layer; + + bool supportBackend(int backendId, int) + { + return backendId == DNN_BACKEND_OPENCV || + backendId == DNN_BACKEND_CUDA; + } + + inline float calculate(float x) const + { + return 1.f/x; + } + +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) + { + return make_cuda_node(target, stream); + } +#endif + + int64 getFLOPSPerElement() const { return 1; } +}; + +template<> +const char* const ReciprocalFunctor::BaseDefaultFunctor::ocl_kernel_name = "ReciprocalForward"; + + #define ACTIVATION_CREATOR_FOR(_Layer, _Functor, ...) \ Ptr<_Layer> _Layer::create() { \ return return Ptr<_Layer>( new ElementWiseLayer<_Functor>(_Functor()) ); } @@ -2604,5 +2701,32 @@ Ptr ChannelsPReLULayer::create(const LayerParams& params) return l; } +Ptr SignLayer::create(const LayerParams& params) +{ + Ptr l(new ElementWiseLayer()); + l->setParamsFrom(params); + + return l; +} + +Ptr ReciprocalLayer::create(const LayerParams& params) +{ + Ptr l(new ElementWiseLayer()); + l->setParamsFrom(params); + + return l; +} + +Ptr ShrinkLayer::create(const LayerParams& params) +{ + float bias = params.get("bias", 0.f); + float lambd = params.get("lambd", 0.5f); + Ptr l(new ElementWiseLayer(ShrinkFunctor(bias, lambd))); + l->setParamsFrom(params); + l->bias = bias; + l->lambd = lambd; + + return l; +} } } diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp index 43d925055c..a67b0c4bb5 100644 --- a/modules/dnn/src/layers/eltwise_layer.cpp +++ b/modules/dnn/src/layers/eltwise_layer.cpp @@ -875,6 +875,8 @@ public: virtual bool tryQuantize(const std::vector > &scales, const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE { + params.set("input_scales", DictValue::arrayReal(scales[0].data(), scales[0].size())); + params.set("input_zeropoints", DictValue::arrayInt(zeropoints[0].data(), zeropoints[0].size())); if (op == SUM) { std::vector newCoeffs; @@ -897,7 +899,6 @@ public: newCoeffs[0] /= scales[1][0]; params.set("coeff", DictValue::arrayReal(newCoeffs.data(), newCoeffs.size())); params.set("offset", zeropoints[1][0]); - params.set("input_zeropoints", DictValue::arrayInt(zeropoints[0].data(), zeropoints[0].size())); return true; } return op == MAX; diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index 5d0ad5fde7..e9632e20be 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -642,6 +642,8 @@ public: params.blobs.push_back(weightsQuantized.reshape(1, shape(blobs[0]))); params.blobs.push_back(biasQuantized); params.blobs.push_back(outputMultiplier); + params.set("input_scale", inputScale); + params.set("input_zeropoint", inputZp); return true; } diff --git a/modules/dnn/src/layers/not_implemented_layer.cpp b/modules/dnn/src/layers/not_implemented_layer.cpp index 3fd52c09d9..f661155219 100644 --- a/modules/dnn/src/layers/not_implemented_layer.cpp +++ b/modules/dnn/src/layers/not_implemented_layer.cpp @@ -8,7 +8,7 @@ namespace cv { namespace dnn { CV__DNN_INLINE_NS_BEGIN -namespace detail { +inline namespace detail { class NotImplementedImpl CV_FINAL : public NotImplemented { diff --git a/modules/dnn/src/layers/permute_layer.cpp b/modules/dnn/src/layers/permute_layer.cpp index 033b3d9aee..cce36b951f 100644 --- a/modules/dnn/src/layers/permute_layer.cpp +++ b/modules/dnn/src/layers/permute_layer.cpp @@ -47,6 +47,7 @@ #include "../ie_ngraph.hpp" #include "../op_vkcom.hpp" #include "../op_webnn.hpp" +#include "../op_timvx.hpp" #include #include @@ -108,6 +109,9 @@ public: _order.push_back(currentOrder); } + zeropoint = params.get("zeropoints", 0); + scale = params.get("scales", 1.0f); + setParamsFrom(params); checkNeedForPermutation(); } @@ -122,6 +126,20 @@ public: return true; } #endif + +#ifdef HAVE_TIMVX + if (backendId == DNN_BACKEND_TIMVX && haveTimVX()) + { + int len = this->type.length(); + if (len <= 4) + return false; + + if (this->type.substr(len - 4) == "Int8") + return true; + else + return false; + } +#endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_WEBNN || @@ -471,12 +489,120 @@ public: } #endif // HAVE_VULKAN +#ifdef HAVE_TIMVX + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { + // tvGraph Initialization. + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + std::vector inputsIndex, outputsIndex; + int input_index = -1, output_index = -1; + + if (outputsWrapper.size() != 1) // only work for single outputBlob + return Ptr(); + + // Input + Ptr inputWrapper = inputsWrapper[0].dynamicCast(); + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if (input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor()) + { + Ptr tvInputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, scale, zeropoint)); + inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT, tvInputQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + inputsIndex.push_back(input_index); + + //Output + Ptr outputWrapper = outputsWrapper[0].dynamicCast(); + // output has the same quantized attrib. + Ptr outputQuant = inputWrapper->getTensorQuantization(); + + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + std::vector tvOrder; + if (getOrderWHCN(tvOrder)) + { + std::shared_ptr tvPermute = graph->CreateOperation(tvOrder); + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvPermute, inputsIndex, outputsIndex); + + return tvBackendNode; + } + else + { + return Ptr(); + } + } +#endif // HAVE_TIMVX + virtual bool tryQuantize(const std::vector > &scales, const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE { return true; } + // convert OpenCV NCHW order to WHCN order. + bool getOrderWHCN(std::vector& orderWHCN) + { + std::map lookup; + int orderLen = _order.size(); + if (orderLen <2) + return false; + orderWHCN.assign(_order.begin(), _order.end()); + + if (orderLen == 2) + { + return true; + } + else if (orderLen >= 3) + { + for (int i = 0; i < orderLen; i++) + { + lookup[i] = orderLen - i - 1; + } + + for (int i = 0; i < orderLen; i++) + { + orderWHCN[i] = lookup[_order[i]]; + } + std::reverse(orderWHCN.begin(), orderWHCN.end()); + return true; + } + else + return false; + } + size_t _count; std::vector _order; @@ -492,6 +618,8 @@ public: #endif size_t _numAxes; + int zeropoint; + float scale; }; Ptr PermuteLayer::create(const LayerParams ¶ms) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index f8616a4184..6c584bf2dd 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -272,6 +272,17 @@ public: return true; } } + else if (backendId == DNN_BACKEND_TIMVX) + { +#ifdef HAVE_TIMVX + if (kernel_size.size() == 3) + { + // fallback to CPU implementation. + preferableTarget = DNN_TARGET_CPU; + } +#endif + return false; + } return false; } diff --git a/modules/dnn/src/layers/recurrent_layers.cpp b/modules/dnn/src/layers/recurrent_layers.cpp index dbb3e2700a..3961051c8e 100644 --- a/modules/dnn/src/layers/recurrent_layers.cpp +++ b/modules/dnn/src/layers/recurrent_layers.cpp @@ -42,10 +42,14 @@ #include "../precomp.hpp" #include -#include #include #include +#ifdef HAVE_CUDA +#include "../cuda4dnn/primitives/recurrent_cells.hpp" +using namespace cv::dnn::cuda4dnn; +#endif + #include "layers_common.hpp" namespace cv @@ -103,7 +107,7 @@ static ActivationFunction get_activation_function(const String& activation) { class LSTMLayerImpl CV_FINAL : public LSTMLayer { - int numTimeStamps, numSamples; + int numTimeStamps, numSamples, numHidden; bool allocated; MatShape outTailShape; //shape of single output sample @@ -119,6 +123,7 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer ActivationFunction f_activation; ActivationFunction g_activation; ActivationFunction h_activation; + bool isDefaultActivations{true}; #if CV_TRY_AVX bool useAVX; @@ -127,6 +132,10 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer bool useAVX2; #endif + // CUDA needs input blobs to be rearranged in a specific way, but some transformations + // in ONNXImporter are destructive, so we keep a copy. + std::vector originalBlobs; + public: LSTMLayerImpl(const LayerParams& params) @@ -140,6 +149,13 @@ public: { setParamsFrom(params); + if (params.get("is_onnx", false)) + { + // collect copies of onnx blobs + originalBlobs.insert(originalBlobs.begin(), blobs.begin(), blobs.begin() + 3); + blobs.erase(blobs.begin(), blobs.begin() + 3); + } + bidirectional = params.get("bidirectional", false); if (!blobs.empty()) { @@ -181,6 +197,7 @@ public: useCellClip = params.get("use_cell_clip", false); usePeephole = params.get("use_peephole", false); reverse = params.get("reverse", false); + numHidden = params.get("hidden_size", 1); CV_Assert(!reverse || !bidirectional); // read activations @@ -190,11 +207,15 @@ public: f_activation = sigmoid; g_activation = tanh; h_activation = tanh; + isDefaultActivations = true; } else { CV_Assert(activations.size() == 3); f_activation = get_activation_function(activations.getStringValue(0)); g_activation = get_activation_function(activations.getStringValue(1)); h_activation = get_activation_function(activations.getStringValue(2)); + isDefaultActivations = activations.getStringValue(0) == "Sigmoid" + && activations.getStringValue(1) == "Tanh" + && activations.getStringValue(2) == "Tanh"; } allocated = false; @@ -233,6 +254,12 @@ public: blobs[2] = Mat(bias.clone()).reshape(1, 1); } + bool supportBackend(int backendId) CV_OVERRIDE + { + return backendId == DNN_BACKEND_OPENCV + || (backendId == DNN_BACKEND_CUDA && isDefaultActivations && !reverse && !usePeephole); + } + bool getMemoryShapes(const std::vector &inputs, const int requiredOutputs, std::vector &outputs, @@ -269,8 +296,21 @@ public: outResShape.insert(outResShape.end(), outTailShape_.begin(), outTailShape_.end()); outResShape.back() *= (1 + static_cast(bidirectional)); - size_t noutputs = produceCellOutput ? 2 : 1; - outputs.assign(noutputs, outResShape); + outputs.assign(1, outResShape); + if (produceCellOutput) + { + // the producer is ONNX, so CellState is different + if (!originalBlobs.empty()) + { + int shp[] = {(1 + static_cast(bidirectional)), _numSamples, numHidden}; + MatShape newShape(shp, shp + sizeof(shp)/sizeof(shp[0])); + outputs.push_back(newShape); + } + else + { + outputs.push_back(outResShape); + } + } internals.assign(1, shape(_numSamples, _numOut)); // hInternal internals.push_back(shape(_numSamples, _numOut)); // cInternal @@ -335,14 +375,39 @@ public: outputs_arr.getMatVector(output); internals_arr.getMatVector(internals); + Mat cOut = produceCellOutput ? output[0].clone() : Mat(); + const bool needYcTransform = !originalBlobs.empty(); // if the producer is onnx const int numDirs = 1 + static_cast(bidirectional); for (int i = 0; i < numDirs; ++i) { - const Mat &Wh = blobs[0].rowRange(i * blobs[0].rows / numDirs, (i + 1) * blobs[0].rows / numDirs); - const Mat &Wx = blobs[1].rowRange(i * blobs[1].rows / numDirs, (i + 1) * blobs[1].rows / numDirs); - const Mat &bias = blobs[2].colRange(i * blobs[2].cols / numDirs, (i + 1) * blobs[2].cols / numDirs); - const Mat &h_0 = blobs[3].rowRange(i * blobs[3].rows / numDirs, (i + 1) * blobs[3].rows / numDirs); - const Mat &c_0 = blobs[4].rowRange(i * blobs[4].rows / numDirs, (i + 1) * blobs[4].rows / numDirs); + Mat Wh = blobs[0]; + Mat Wx = blobs[1]; + Mat bias = blobs[2]; + Mat h_0 = blobs[3]; + Mat c_0 = blobs[4]; + Mat pI, pF, pO; + + Wh = Wh.rowRange(i * Wh.rows / numDirs, (i + 1) * Wh.rows / numDirs); + Wx = Wx.rowRange(i * Wx.rows / numDirs, (i + 1) * Wx.rows / numDirs); + bias = bias.colRange(i * bias.cols / numDirs, (i + 1) * bias.cols / numDirs); + h_0 = h_0.rowRange(i * h_0.rows / numDirs, (i + 1) * h_0.rows / numDirs); + c_0 = c_0.rowRange(i * c_0.rows / numDirs, (i + 1) * c_0.rows / numDirs); + + if (usePeephole) + { + pI = blobs[5]; + pF = blobs[6]; + pO = blobs[7]; + + pI = pI.rowRange(i * pI.rows / numDirs, (i + 1) * pI.rows / numDirs); + pI = pI.colRange(i * pI.cols / numDirs, (i + 1) * pI.cols / numDirs); + + pF = pF.rowRange(i * pF.rows / numDirs, (i + 1) * pF.rows / numDirs); + pF = pF.colRange(i * pF.cols / numDirs, (i + 1) * pF.cols / numDirs); + + pO = pO.rowRange(i * pO.rows / numDirs, (i + 1) * pO.rows / numDirs); + pO = pO.colRange(i * pO.cols / numDirs, (i + 1) * pO.cols / numDirs); + } int numOut = Wh.size[1]; Mat hInternal = internals[0], cInternal = internals[1], @@ -356,7 +421,12 @@ public: Mat hOutTs = output[0].reshape(1, numSamplesTotal); hOutTs = hOutTs.colRange(i * hOutTs.cols / numDirs, (i + 1) * hOutTs.cols / numDirs); - Mat cOutTs = produceCellOutput ? output[1].reshape(1, numSamplesTotal) : Mat(); + Mat cOutTs; + if (produceCellOutput) + { + cOutTs = cOut.reshape(1, numSamplesTotal); + cOutTs = cOutTs.colRange(i * cOutTs.cols / numDirs, (i + 1) * cOutTs.cols / numDirs); + } #if CV_TRY_AVX2 || CV_TRY_AVX bool canUseAvx = gates.isContinuous() && bias.isContinuous() @@ -471,8 +541,8 @@ public: if (usePeephole) { Mat gatesIF = gates.colRange(0, 2*numOut); - gemm(cInternal, blobs[5], 1, gateI, 1, gateI); - gemm(cInternal, blobs[6], 1, gateF, 1, gateF); + gemm(cInternal, pI, 1, gateI, 1, gateI); + gemm(cInternal, pF, 1, gateF, 1, gateF); f_activation(gatesIF, gatesIF); } else @@ -495,7 +565,7 @@ public: } if (usePeephole) { - gemm(cInternal, blobs[7], 1, gateO, 1, gateO); + gemm(cInternal, pO, 1, gateO, 1, gateO); f_activation(gateO, gateO); } @@ -509,7 +579,129 @@ public: cInternal.copyTo(cOutTs.rowRange(curRowRange)); } } + + if (needYcTransform && produceCellOutput) + { + fixCellState(cOut, numDirs); + } + if (produceCellOutput) + { + cOut.copyTo(output[1]); + } } + + void fixCellState(Mat& cOut, int numDirs) + { + // seq, batch, dirs, hidden + int shp[] = {0, numSamples, numDirs, numHidden}; + cOut = cOut.reshape(1, sizeof(shp)/sizeof(shp[0]), shp); + + // permute to {0, 2, 1, 3}; + cv::Mat newCellState; + cv::transposeND(cOut, {0, 2, 1, 3}, newCellState); + cOut = newCellState; + + if (numDirs == 1) + { + // Slice: Yh = Y[-1, :, :, :] + Range ranges[] = {cv::Range(cOut.size[0] - 1, cOut.size[0]), cv::Range::all(), cv::Range::all(), cv::Range::all()}; + cOut = cOut(ranges); + // Reshape: 1x1xBxH -> 1xBxH + int shp[] = {1, numSamples, numHidden}; + cOut = cOut.reshape(1, sizeof(shp)/sizeof(shp[0]), shp); + } + else + { + // Slice: SxDxBxH -> last sequence, first direction + Range ranges1[] = {cv::Range(cOut.size[0] - 1, cOut.size[0]), cv::Range(0, 1), cv::Range::all(), cv::Range::all()}; + Mat part1 = cOut(ranges1); + + // Slice: SxDxBxH -> first sequence, last direction + Range ranges2[] = {cv::Range(0, 1), cv::Range(cOut.size[1] - 1, cOut.size[1]), cv::Range::all(), cv::Range::all()}; + Mat part2 = cOut(ranges2); + + int shp[] = {1, part1.size[2] * part1.size[3]}; + part1 = part1.reshape(1, sizeof(shp)/sizeof(shp[0]), shp); + part2 = part2.reshape(1, sizeof(shp)/sizeof(shp[0]), shp); + + vconcat(part1, part2, cOut); + + // Reshape: 1x2xBxH -> 2xBxH + int finalShape[] = {2, numSamples, numHidden}; + cOut = cOut.reshape(1, sizeof(finalShape)/sizeof(finalShape[0]), finalShape); + } + } + +#ifdef HAVE_CUDA + Ptr initCUDA(void *context_, const std::vector> &inputs, + const std::vector> &outputs) override + { + const int numDirs = 1 + static_cast(bidirectional); + auto toIFCO = [numDirs] (Mat& in) { + int first = in.size[0]; + int rest = in.total() / first / 4; + // every weight blob contains weights for Input, Output, Forget and Cell gates + Mat m = in.reshape(1, {first, 4, rest}); + Mat outputGate = m.col(1); + Mat forgetGate = m.col(2); + Mat cellGate = m.col(3); + // IOFC -> IFOC + std::swap_ranges(outputGate.begin(), outputGate.end(), forgetGate.begin()); + std::swap(outputGate, forgetGate); + // IFOC -> IFCO + std::swap_ranges(outputGate.begin(), outputGate.end(), cellGate.begin()); + in = in.reshape(1, numDirs); + }; + + Mat& b = originalBlobs[2]; + // B is a concatenation of biases for Wh and Wx + b = b.reshape(1, originalBlobs[2].size[0]*2); + + for (auto& m : originalBlobs) + { + toIFCO(m); + } + + b = b.reshape(1, static_cast(b.total())); + + Mat ordered_weights; + // Wx_f, Wh_f, [Wx_b, Wh_b,] b + for (int i = 0; i < numDirs; ++i) + { + for (size_t j = 0; j < 2; ++j) // Wx, Wh + { + Mat oneDirection = originalBlobs[j].row(i); + ordered_weights.push_back(oneDirection.reshape(1, static_cast(oneDirection.total()))); + } + } + ordered_weights.push_back(b); + + // Pass hidden states as is + Mat h0 = blobs[3]; + Mat c0 = blobs[4]; + + CV_Assert(!inputs.empty()); + auto input_wrapper = inputs[0].dynamicCast(); + auto input_shape = input_wrapper->getShape(); + + RNNConfiguration config + { + input_shape[0], // seqLength; + 1, // numLayers; + numHidden, // hiddenSize; + input_shape[2], // inputSize; + input_shape[1], // miniBatch; + bidirectional + }; + + + auto *context = reinterpret_cast(context_); + return make_cuda_node(preferableTarget, std::move(context->stream), + std::move(context->cudnn_handle), + ordered_weights, h0, c0, + config); + } +#endif }; Ptr LSTMLayer::create(const LayerParams& params) diff --git a/modules/dnn/src/layers/reduce_layer.cpp b/modules/dnn/src/layers/reduce_layer.cpp new file mode 100644 index 0000000000..47aec237c7 --- /dev/null +++ b/modules/dnn/src/layers/reduce_layer.cpp @@ -0,0 +1,389 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "../precomp.hpp" +#include "opencv2/core/hal/intrin.hpp" +#include "../op_cuda.hpp" +#include "../op_webnn.hpp" + +#include +#include +#include +using std::max; +using std::min; + +#include + +namespace cv +{ +namespace dnn +{ + +class ReduceLayerImpl CV_FINAL : public ReduceLayer +{ +public: + ReduceLayerImpl(const LayerParams& params) + { + setParamsFrom(params); + // set reduce type + CV_Assert(params.has("reduce")); + String typeString = toLowerCase(params.get("reduce")); + if (typeString == "max") + reduceType= MAX; + else if (typeString == "min") + reduceType= MIN; + else if (typeString == "ave") + reduceType= AVE; + else if (typeString == "sum") + reduceType= SUM; + else if (typeString == "sum_square") + reduceType= SUM_SQUARE; + else if (typeString == "l1") + reduceType= L1; + else if (typeString == "l2") + reduceType= L2; + else if (typeString == "log_sum") + reduceType= LOG_SUM; + else if (typeString == "log_sum_exp") + reduceType= LOG_SUM_EXP; + else if (typeString == "prod") + reduceType= PROD; + else + CV_Error(Error::StsBadArg, "Unknown reduce type\"" + typeString + "\""); + + // set deleted dims + CV_Assert(params.has("deleted_dims")); + DictValue tempDims = params.get("deleted_dims"); + int i, n = tempDims.size(); + reduceDims.resize(n); + for (i = 0; i < n; i++) + { + reduceDims[i] = tempDims.get(i); + } + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + if (backendId == DNN_BACKEND_OPENCV) + { + return true; + } + return false; + } + + // reduceType == MIN + struct ReduceOpMIN + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + return std::accumulate(first, last, FLT_MAX, + [](float a, float b) + { + return std::min(a, b); + }); + } + }; + + // reduceType == MAX + struct ReduceOpMAX + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + return std::accumulate(first, last, -FLT_MAX, + [](float a, float b) + { + return std::max(a, b); + }); + } + }; + + // reduceType == SUM + struct ReduceOpSUM + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + return std::accumulate(first, last, 0.f); + } + }; + + // reduceType == AVE + struct ReduceOpAVE + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + float output = std::accumulate(first, last, 0.f); + return output * ikarea; + } + }; + + // reduceType == SUM_SQUARE + struct ReduceOpSUM_SQUARE + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + return std::accumulate(first, last, 0.f, + [](float a, float b) + { + return a + b * b; + }); + } + }; + + // reduceType == L1 + struct ReduceOpL1 + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + return std::accumulate(first, last, 0.f, + [](float a, float b) + { + return a + std::abs(b); + }); + } + }; + + // reduceType == L2 + struct ReduceOpL2 + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + float output = std::accumulate(first, last, 0.f, + [](float a, float b) + { + return a + b * b; + }); + return std::sqrt(output); + } + }; + + // reduceType == PROD + struct ReduceOpPROD + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + return std::accumulate(first, last, 1.0f, std::multiplies()); + } + }; + + // reduceType == LOG_SUM + struct ReduceOpLOG_SUM + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + float output = std::accumulate(first, last, 0.0f); + return std::log(output); + } + }; + + // reduceType == LOG_SUM_EXP + struct ReduceOpLOG_SUM_EXP + { + float apply(const float* first, const float* last, const float ikarea = 1.0f) + { + float output = std::accumulate(first, last, 0.0f, + [](float a, float b) + { + return a + std::exp(b); + }); + return std::log(output); + } + }; + + template + class ReduceInvoker : public ParallelLoopBody + { + public: + const Mat* src; + Mat *dst; + std::vector reduceDims; + int nstripes; + int reduceType; + Ptr func; + + ReduceInvoker() : src(0), dst(0), nstripes(0), reduceType(MAX), func(makePtr()) {} + + static void run(const Mat& src, Mat& dst, std::vector reduceDims, int reduceType, int nstripes) + { + CV_Assert_N( src.isContinuous(), dst.isContinuous(), src.type() == CV_32F, src.type() == dst.type()); + + ReduceInvoker p; + + p.src = &src; + p.dst = &dst; + + p.reduceDims = reduceDims; + p.nstripes = nstripes; + p.reduceType = reduceType; + + parallel_for_(Range(0, nstripes), p, nstripes); + } + + void operator()(const Range& r) const CV_OVERRIDE + { + size_t total = dst->total(); + size_t stripeSize = (total + nstripes - 1)/nstripes; + size_t stripeStart = r.start*stripeSize; + size_t stripeEnd = std::min(r.end*stripeSize, total); + size_t stride_w = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies()); + + float *dstData = (float *)dst->data; + float *srcData = (float *)src->data; + + for (size_t ofs = stripeStart; ofs < stripeEnd;) + { + const float* first = srcData + ofs * stride_w; + const float* last = srcData + (ofs + 1) * stride_w; + + if (ofs < stripeEnd) + { + dstData[ofs] = func->apply(first, last, 1.0 / stride_w); + ofs += 1; + } + } + } + }; + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + if (inputs_arr.depth() == CV_16S) + { + forward_fallback(inputs_arr, outputs_arr, internals_arr); + return; + } + + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + CV_Assert(inputs.size() == 1 || (inputs.size() == 2 && reduceType== SUM)); + const int nstripes = getNumThreads(); + + switch (reduceType) + { + case MIN: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case MAX: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case AVE: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case SUM: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case L1: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case L2: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case SUM_SQUARE: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case PROD: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case LOG_SUM: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + case LOG_SUM_EXP: + { + ReduceInvoker::run(inputs[0], outputs[0], reduceDims, reduceType, nstripes); + break; + } + default: + CV_Error(Error::StsNotImplemented, "Not implemented"); + break; + } + } + + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const CV_OVERRIDE + { + CV_Assert(inputs.size() > 0); + CV_Assert(reduceDims.size() != 0 && inputs[0].size() >= reduceDims.size()); + + std::vector outShape; + if (inputs[0].size() == reduceDims.size()) + outShape.push_back(1); + else + { + for (int i = 0; i < inputs[0].size() - reduceDims.size(); i++) + { + outShape.push_back(inputs[0][i]); + } + } + outputs.assign(1, outShape); + + return false; + } + + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + if (reduceType== MAX || reduceType== MIN) + { + return true; + } + return false; + } + + virtual int64 getFLOPS(const std::vector &inputs, + const std::vector &outputs) const CV_OVERRIDE + { + CV_UNUSED(inputs); // suppress unused variable warning + long flops = 0; + size_t stride_w = std::accumulate(reduceDims.begin(), reduceDims.end(), 1, std::multiplies()); + for (int i = 0; i < outputs.size(); i++) + { + flops += total(outputs[i])*(stride_w); + } + return flops; + } +private: + enum ReduceType + { + MAX, + MIN, + AVE, + SUM, + L1, + L2, + PROD, + SUM_SQUARE, + LOG_SUM, + LOG_SUM_EXP + }; +}; + +Ptr ReduceLayer::create(const LayerParams& params) +{ + return Ptr(new ReduceLayerImpl(params)); +} + +} +} diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp index f62235dc20..290effd380 100644 --- a/modules/dnn/src/layers/reshape_layer.cpp +++ b/modules/dnn/src/layers/reshape_layer.cpp @@ -46,6 +46,7 @@ #include "../op_inf_engine.hpp" #include "../ie_ngraph.hpp" #include "../op_webnn.hpp" +#include "../op_timvx.hpp" #include @@ -167,6 +168,9 @@ public: hasDynamicShapes = params.get("has_dynamic_shapes", false); shapesInitialized = !hasDynamicShapes; + zeropoint = params.get("zeropoints", 0); + scale = params.get("scales", 1.0f); + CV_Assert(numAxes >= -1); newShapeRange = (numAxes == -1) ? Range(axis, INT_MAX) : Range(axis, axis + numAxes); @@ -202,6 +206,18 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { + if (backendId == DNN_BACKEND_TIMVX && haveTimVX()) + { + int len = this->type.length(); + if (len <= 4) + return false; + + if (this->type.substr(len - 4) == "Int8") + return true; + else + return false; + } + #ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) return true; @@ -348,6 +364,99 @@ public: } #endif + virtual Ptr initTimVX(void* timVXInfo_, + const std::vector > &inputsWrapper, + const std::vector > &outputsWrapper, + bool isLast) CV_OVERRIDE + { +#ifdef HAVE_TIMVX + // tvGraph Initialization. + auto timVxInfo = reinterpret_cast(timVXInfo_); + CV_Assert(timVxInfo); + Ptr tvGraph = timVxInfo->getGraph(); + CV_Assert(tvGraph); + Ptr graph = tvGraph->graph; + + std::vector inputsIndex, outputsIndex; + int input_index = -1, output_index = -1; + + int reshapeNum = 0; + Ptr tmpWrapper, inputWrapper, outputWrapper; + for (size_t i = 0; i < outputsWrapper.size(); i++) + { + tmpWrapper = inputsWrapper[i].dynamicCast(); + Mat srcBlob = tmpWrapper->getMat(); + + tmpWrapper = outputsWrapper[i].dynamicCast(); + Mat dstBlob = tmpWrapper->getMat(); + if (dstBlob.data != srcBlob.data) + { + reshapeNum++; + inputWrapper = inputsWrapper[i].dynamicCast(); + outputWrapper = outputsWrapper[i].dynamicCast(); + } + } + + // Only work for single reshape Mat + if (reshapeNum != 1) + { + return Ptr(); + } + + // Input + if (inputWrapper->isTensor()) + { + input_index = tvGraph->getTensorIndex(inputWrapper->getTensor()); + if (input_index == -1) + { + // Copy To New inputWrapper + Mat tmp = inputWrapper->getMat(); + inputWrapper = Ptr(new TimVXBackendWrapper(tmp)); + } + } + + if (!inputWrapper->isTensor() || input_index == -1) + { + Ptr tvInputQuant = Ptr( + new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, scale, zeropoint)); + inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT,tvInputQuant); + input_index = tvGraph->addWrapper(inputWrapper); + } + inputsIndex.push_back(input_index); + + //Output + // Output Tensor has the same quantized attrib as Input Tesor. + Ptr outputQuant = inputWrapper->getTensorQuantization(); + if (isLast) + { + auto shapeType = getShapeTypeFromMat(outputWrapper->getMat()); + + // For Graph Output tensor, we need to set tensor shape before createTensor(). + outputWrapper->setTensorShape(shapeType); + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant); + } + else + { + outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant); + } + output_index = tvGraph->addWrapper(outputWrapper); + outputsIndex.push_back(output_index); + + // generate output shape. + MatShape outputShape = shape(outputWrapper->getMat()); + // reverse shape, from NCHW to WHCN + std::reverse(outputShape.begin(), outputShape.end()); + std::vector tvShape(outputShape.begin(), outputShape.end()); + + std::shared_ptr tvReshape = graph->CreateOperation(tvShape); + + Ptr tvBackendNode = new TimVXBackendNode(tvGraph, tvReshape, inputsIndex, outputsIndex); + + return tvBackendNode; +#endif // HAVE_TIMVX + return Ptr(); + } + virtual bool tryQuantize(const std::vector > &scales, const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE { @@ -360,6 +469,8 @@ private: std::vector inputIndices; // Which axes from input are needed to compute correct output shape bool hasDynamicShapes; bool shapesInitialized; + float scale; + int zeropoint; }; Ptr ReshapeLayer::create(const LayerParams& params) diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index 4370e566a5..aa44e4a5b9 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -64,12 +64,32 @@ namespace cv namespace dnn { -void sliceRangesFromShape(const MatShape& inpShape, int& axis, std::vector >& sliceRanges) +Range normalizeRange(const Range& input_range, int n) { + Range range = input_range; + + range.start = std::min(std::max(range.start, -n), n - 1); + if (range.start < 0) + { + range.start += n; + } + + range.end = std::min(std::max(range.end, -n), n); + if (range.end < 0) + { + range.end += n; + } + + return range; +} + +std::vector > finalizeSliceRange(const MatShape& inpShape, int& axis, + const std::vector >& inputSliceRanges) +{ + std::vector > sliceRanges = inputSliceRanges; CV_Assert(inpShape.size() > 0); bool axisNeg = (axis < 0); axis = (axis + static_cast(inpShape.size())) % inpShape.size(); - int n = inpShape[axis]; for (size_t i = 0; i < sliceRanges.size(); ++i){ std::vector& ranges = sliceRanges[i]; @@ -77,16 +97,20 @@ void sliceRangesFromShape(const MatShape& inpShape, int& axis, std::vector= 0) + for (size_t j = 0; j < ranges.size(); ++j) { - continue; - } + int n = inpShape[j]; + if (n <= 0) + { + continue; + } - CV_Assert(n != 0); - range.start = (n + range.start) % n; + ranges[j] = normalizeRange(ranges[j], n); + } } + + return sliceRanges; } class SliceLayerImpl : public SliceLayer @@ -136,7 +160,7 @@ public: { int size = sizeOrEnd; CV_Assert(size == -1 || size > 0); // -1 value means range [start, axis_size). - sliceRanges[0][i].end = size > 0 ? (start + size) : -1; // We'll finalize a negative value later. + sliceRanges[0][i].end = size > 0 ? (start + size) : INT_MAX; // We'll finalize a negative value later. } else { @@ -186,8 +210,7 @@ public: MatShape inpShape = inputs[0]; int axis_rw = axis; - std::vector > sliceRanges_rw = sliceRanges; - sliceRangesFromShape(inpShape, axis_rw, sliceRanges_rw); + std::vector > sliceRanges_rw = finalizeSliceRange(inpShape, axis_rw, sliceRanges); if (!sliceRanges_rw.empty()) { @@ -198,7 +221,7 @@ public: for (int j = 0; j < sliceRanges_rw[i].size(); ++j) { if (shapesInitialized || inpShape[j] > 0) - outputs[i][j] = normalize_axis_range(sliceRanges_rw[i][j], inpShape[j]).size(); + outputs[i][j] = normalizeRange(sliceRanges_rw[i][j], inpShape[j]).size(); if (!sliceSteps.empty() && (i < sliceSteps.size()) && (j < sliceSteps[i].size()) && (sliceSteps[i][j] > 1)) outputs[i][j] = (outputs[i][j] + sliceSteps[i][j] - 1) / sliceSteps[i][j]; @@ -235,8 +258,7 @@ public: CV_Assert(inputs.size() == 1); const MatSize& inpShape = inputs[0].size; - sliceRangesFromShape(shape(inputs[0]), axis, sliceRanges); - finalSliceRanges = sliceRanges; + finalSliceRanges = finalizeSliceRange(shape(inputs[0]), axis, sliceRanges); if (sliceRanges.empty()) { @@ -266,7 +288,7 @@ public: // Clamp. for (int j = 0; j < finalSliceRanges[i].size(); ++j) { - finalSliceRanges[i][j] = normalize_axis_range(finalSliceRanges[i][j], inpShape[j]); + finalSliceRanges[i][j] = normalizeRange(finalSliceRanges[i][j], inpShape[j]); } } diff --git a/modules/dnn/src/layers/softmax_layer.cpp b/modules/dnn/src/layers/softmax_layer.cpp index 790f181325..b10aef3453 100644 --- a/modules/dnn/src/layers/softmax_layer.cpp +++ b/modules/dnn/src/layers/softmax_layer.cpp @@ -390,6 +390,8 @@ public: } params.blobs.clear(); params.blobs.push_back(lookUpTable); + params.set("input_scale", inpScale); + params.set("input_zeropoint", zeropoints[0][0]); return true; } diff --git a/modules/dnn/src/legacy_backend.cpp b/modules/dnn/src/legacy_backend.cpp new file mode 100644 index 0000000000..fa9407aacd --- /dev/null +++ b/modules/dnn/src/legacy_backend.cpp @@ -0,0 +1,130 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "legacy_backend.hpp" + +#include "op_halide.hpp" +#include "op_inf_engine.hpp" +#include "ie_ngraph.hpp" +#include "op_vkcom.hpp" +#include "op_cuda.hpp" +#include "op_webnn.hpp" +#include "op_timvx.hpp" + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +BackendNode::BackendNode(int backendId) + : backendId(backendId) +{} + +BackendNode::~BackendNode() {}; + +BackendWrapper::BackendWrapper(int backendId, int targetId) + : backendId(backendId) + , targetId(targetId) +{} + +BackendWrapper::BackendWrapper(int targetId, const cv::Mat& m) +{ + CV_Error(Error::StsNotImplemented, + "Constructor of backend wrapper must be implemented"); +} + +BackendWrapper::BackendWrapper(const Ptr& base, const MatShape& shape) +{ + CV_Error(Error::StsNotImplemented, + "Constructor of backend wrapper must be implemented"); +} + +BackendWrapper::~BackendWrapper() {} + + + +inline namespace detail { + + +Ptr wrapMat(int backendId, int targetId, cv::Mat& m) +{ + if (backendId == DNN_BACKEND_OPENCV) + { + if (targetId == DNN_TARGET_CPU) + return Ptr(); +#ifdef HAVE_OPENCL + else if (IS_DNN_OPENCL_TARGET(targetId)) + return OpenCLBackendWrapper::create(m); +#endif + else + CV_Error(Error::StsNotImplemented, "Unknown/unsupported target identifier"); + } + else if (backendId == DNN_BACKEND_HALIDE) + { + CV_Assert(haveHalide()); +#ifdef HAVE_HALIDE + return Ptr(new HalideBackendWrapper(targetId, m)); +#endif // HAVE_HALIDE + } + else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + CV_ERROR_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019; + } + else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { +#ifdef HAVE_DNN_NGRAPH + return Ptr(new NgraphBackendWrapper(targetId, m)); +#else + CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of OpenVINO / Inference Engine + nGraph"); +#endif + } + else if (backendId == DNN_BACKEND_WEBNN) + { +#ifdef HAVE_WEBNN + return Ptr(new WebnnBackendWrapper(targetId, m)); +#else + CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of WebNN"); +#endif + } + else if (backendId == DNN_BACKEND_VKCOM) + { + CV_Assert(haveVulkan()); +#ifdef HAVE_VULKAN + return Ptr(new VkComBackendWrapper(m)); +#endif // HAVE_VULKAN + } + else if (backendId == DNN_BACKEND_CUDA) + { + CV_Assert(haveCUDA()); + +#ifdef HAVE_CUDA + switch (targetId) + { + case DNN_TARGET_CUDA: + return CUDABackendWrapperFP32::create(m); + case DNN_TARGET_CUDA_FP16: + return CUDABackendWrapperFP16::create(m); + default: + CV_Assert(IS_DNN_CUDA_TARGET(targetId)); + } +#endif + } + else if (backendId == DNN_BACKEND_TIMVX) + { + CV_Assert(haveTimVX()); +#ifdef HAVE_TIMVX + return Ptr(new TimVXBackendWrapper(m)); +#endif // HAVE_TIMVX + } + else + CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); + return Ptr(); // TODO Error? +} // wrapMat() + + +} // namespace detail +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/legacy_backend.hpp b/modules/dnn/src/legacy_backend.hpp new file mode 100644 index 0000000000..e9ca3fecb3 --- /dev/null +++ b/modules/dnn/src/legacy_backend.hpp @@ -0,0 +1,339 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef __OPENCV_DNN_SRC_LEGACY_BACKEND_HPP__ +#define __OPENCV_DNN_SRC_LEGACY_BACKEND_HPP__ + +#include "layer_internals.hpp" // LayerPin LayerData DataLayer + +namespace cv { namespace dnn { +CV__DNN_INLINE_NS_BEGIN +inline namespace detail { + + +#ifdef HAVE_OPENCL +class OpenCLBackendWrapper : public BackendWrapper +{ +public: + OpenCLBackendWrapper(Mat& m) + : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL) + { + m.copyTo(umat); + host = &m; + hostDirty = false; + } + + OpenCLBackendWrapper(const Ptr& baseBuffer, Mat& m) + : BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL) + { + Ptr base = baseBuffer.dynamicCast(); + CV_Assert(!base.empty()); + + host = &m; + + int shape[] = { 1, (int)base->umat.total() }; + umat = base->umat.reshape(1, 2, &shape[0]) + .colRange(0, host->total()) + .reshape(1, host->dims, &host->size[0]); + hostDirty = false; + } + + static Ptr create(Mat& m) + { + return Ptr(new OpenCLBackendWrapper(m)); + } + + static Ptr create(const Ptr& baseBuffer, Mat& m) + { + return Ptr(new OpenCLBackendWrapper(baseBuffer, m)); + } + + static std::vector getUMatVector(const std::vector>& wrappers) + { + const int numWrappers = wrappers.size(); + std::vector mats(wrappers.size()); + for (int i = 0; i < numWrappers; ++i) + { + Ptr umatWrapper = wrappers[i].dynamicCast(); + CV_Assert(!umatWrapper.empty()); + umatWrapper->copyToDevice(); + mats[i] = umatWrapper->umat; + } + return mats; + } + + // Replaces all umats in wrappers to specific ones. + static void update(const std::vector>& wrappers, + const std::vector& umats) + { + CV_Assert(wrappers.size() == umats.size()); + for (int i = 0, n = umats.size(); i < n; ++i) + { + Ptr umatWrapper = wrappers[i].dynamicCast(); + CV_Assert(!umatWrapper.empty()); + umatWrapper->umat = umats[i]; + } + } + + ~OpenCLBackendWrapper() {} + + // Copies data from device to a host memory. + virtual void copyToHost() CV_OVERRIDE + { + umat.copyTo(*host); + } + + virtual void setHostDirty() CV_OVERRIDE + { + hostDirty = true; + }; + + void copyToDevice() + { + if (hostDirty) + { + host->copyTo(umat); + hostDirty = false; + } + } + +private: + UMat umat; + Mat* host; + bool hostDirty; +}; // OpenCLBackendWrapper +#endif // HAVE_OPENCL + + +struct BlobManager +{ +public: + // Increase references counter to layer output. + void addReference(const LayerPin& lp) + { + std::map::iterator it = refCounter.find(lp); + if (it == refCounter.end()) + refCounter[lp] = 1; + else + it->second += 1; + } + + void addReferences(const std::vector& pins) + { + for (int i = 0; i < pins.size(); i++) + { + addReference(pins[i]); + } + } + + // Returns number of references to allocated memory that used in specific + // layer blob. + int numReferences(const LayerPin& lp) + { + std::map::const_iterator mapIt = reuseMap.find(lp); + CV_Assert(mapIt != reuseMap.end()); + LayerPin memHost = mapIt->second; + + std::map::const_iterator refIt = refCounter.find(memHost); + CV_Assert(refIt != refCounter.end()); + return refIt->second; + } + + // Reuse data allocated in inside the blob. + void reuse(const LayerPin& host, const LayerPin& user) + { + CV_Assert(reuseMap.find(user) == reuseMap.end()); + CV_Assert(reuseMap.find(host) != reuseMap.end()); + LayerPin memHost = reuseMap[host]; + reuseMap[user] = memHost; + if (refCounter.find(memHost) != refCounter.end()) + { + std::map::iterator userRefIt = refCounter.find(user); + if (userRefIt != refCounter.end()) + { + refCounter[memHost] += userRefIt->second; + refCounter.erase(userRefIt); + } + else + refCounter[memHost] += 1; + } + } + + // Decrease references counter to allocated memory inside specific blob. + void releaseReference(const LayerPin& lp) + { + std::map::const_iterator mapIt = reuseMap.find(lp); + CV_Assert(mapIt != reuseMap.end()); + + std::map::iterator refIt = refCounter.find(mapIt->second); + CV_Assert(refIt != refCounter.end()); + CV_Assert(refIt->second > 0); + refIt->second -= 1; + } + + void releaseReferences(const std::vector& pins) + { + for (int i = 0; i < pins.size(); i++) + { + releaseReference(pins[i]); + } + } + + void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, const int& dtype) + { + if (!getParam_DNN_DISABLE_MEMORY_OPTIMIZATIONS()) + { + Mat bestBlob; + LayerPin bestBlobPin; + + std::map::const_iterator hostIt; + std::map::const_iterator refIt; + + const int targetTotal = total(shape); + int bestBlobTotal = INT_MAX; + + for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt) + { + refIt = refCounter.find(hostIt->first); + // Use only blobs that had references before because if not, + // it might be used as output. + if (refIt != refCounter.end() && refIt->second == 0) + { + const Mat& unusedBlob = hostIt->second; + if (unusedBlob.total() >= targetTotal && unusedBlob.total() < bestBlobTotal && unusedBlob.type() == dtype) + { + bestBlobPin = hostIt->first; + bestBlob = unusedBlob; + bestBlobTotal = unusedBlob.total(); + } + } + } + if (!bestBlob.empty()) + { + reuse(bestBlobPin, lp); + dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape); + return; + } + } + + { + // if dst already has been allocated with total(shape) elements, + // it won't be recreated and pointer of dst.data remains the same. + dst.create(shape, dtype); + addHost(lp, dst); + } + } + + void allocateBlobsForLayer(LayerData& ld, const LayerShapes& layerShapes, + std::vector& pinsForInternalBlobs) + { + CV_TRACE_FUNCTION(); + + pinsForInternalBlobs.clear(); + + std::vector&outputBlobs = ld.outputBlobs, + &internalBlobs = ld.internals; + + const ShapesVec &outShapes = layerShapes.out, + internalShapes = layerShapes.internal; + + outputBlobs.resize(std::max((size_t)1, outShapes.size())); // layer produce at least one output blob + internalBlobs.resize(internalShapes.size()); + + CV_Assert(ld.requiredOutputs.size() <= outShapes.size()); + + // Check that layer could work in-place. + bool inPlace = false; + if (layerShapes.supportInPlace) + { + if (ld.inputBlobs.size() == 1) + { + // Get number of references to the input memory. + int numRef = numReferences(ld.inputBlobsId[0]); + // If current layer is one and only customer of this blob. + inPlace = numRef == 1; + } + } + + ShapesVec shapes(outShapes); + shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end()); + std::vector blobs; + for (int i = 0; i < outputBlobs.size(); i++) + { + blobs.push_back(&outputBlobs[i]); + } + + for (int i = 0; i < internalBlobs.size(); i++) + { + blobs.push_back(&internalBlobs[i]); + if (total(internalShapes[i])) + { + pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i)); + } + } + + addReferences(pinsForInternalBlobs); + + std::map> idxSizes; + for (int i = 0; i < shapes.size(); i++) + { + idxSizes[total(shapes[i])].push_back(i); + } + + std::map>::reverse_iterator it; + for (it = idxSizes.rbegin(); it != idxSizes.rend(); it++) + { + for (int j = 0; j < it->second.size(); j++) + { + int index = it->second[j]; + if (total(shapes[index])) + { + LayerPin blobPin(ld.id, index); + if (index < outShapes.size() && inPlace) + { + CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index])); + ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]); + reuse(ld.inputBlobsId[0], blobPin); + } + else + reuseOrCreate(shapes[index], blobPin, *blobs[index], ld.dtype); + } + } + } + } + + // Clear internal state. Calls before an every reallocation. + void reset() + { + CV_TRACE_FUNCTION(); + + refCounter.clear(); + reuseMap.clear(); + memHosts.clear(); + } + +private: + // Register allocated memory. + void addHost(const LayerPin& lp, const Mat& mat) + { + CV_Assert(memHosts.find(lp) == memHosts.end()); + reuseMap[lp] = lp; + memHosts[lp] = mat; + } + + std::map refCounter; + // Maps pin to origin blob (for whom memory was allocated firstly). + // For origin blobs key == value. + std::map reuseMap; + std::map memHosts; +}; // BlobManager + + +Ptr wrapMat(int backendId, int targetId, cv::Mat& m); + + +} // namespace detail +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn +#endif // __OPENCV_DNN_SRC_LEGACY_BACKEND_HPP__ diff --git a/modules/dnn/src/model.cpp b/modules/dnn/src/model.cpp index bc8709d22e..7444011a64 100644 --- a/modules/dnn/src/model.cpp +++ b/modules/dnn/src/model.cpp @@ -197,28 +197,95 @@ void Model::predict(InputArray frame, OutputArrayOfArrays outs) const } +class ClassificationModel_Impl : public Model::Impl +{ +public: + virtual ~ClassificationModel_Impl() {} + ClassificationModel_Impl() : Impl() {} + ClassificationModel_Impl(const ClassificationModel_Impl&) = delete; + ClassificationModel_Impl(ClassificationModel_Impl&&) = delete; + + void setEnableSoftmaxPostProcessing(bool enable) + { + applySoftmax = enable; + } + + bool getEnableSoftmaxPostProcessing() const + { + return applySoftmax; + } + + std::pair classify(InputArray frame) + { + std::vector outs; + processFrame(frame, outs); + CV_Assert(outs.size() == 1); + + Mat out = outs[0].reshape(1, 1); + + if(getEnableSoftmaxPostProcessing()) + { + softmax(out, out); + } + + double conf; + Point maxLoc; + cv::minMaxLoc(out, nullptr, &conf, nullptr, &maxLoc); + return {maxLoc.x, static_cast(conf)}; + } + +protected: + void softmax(InputArray inblob, OutputArray outblob) + { + const Mat input = inblob.getMat(); + outblob.create(inblob.size(), inblob.type()); + + Mat exp; + const float max = *std::max_element(input.begin(), input.end()); + cv::exp((input - max), exp); + outblob.getMat() = exp / cv::sum(exp)[0]; + } + +protected: + bool applySoftmax = false; +}; + +ClassificationModel::ClassificationModel() + : Model() +{ + // nothing +} + ClassificationModel::ClassificationModel(const String& model, const String& config) - : Model(model, config) + : ClassificationModel(readNet(model, config)) { // nothing } ClassificationModel::ClassificationModel(const Net& network) - : Model(network) + : Model() { - // nothing + impl = makePtr(); + impl->initNet(network); +} + +ClassificationModel& ClassificationModel::setEnableSoftmaxPostProcessing(bool enable) +{ + CV_Assert(impl != nullptr && impl.dynamicCast() != nullptr); + impl.dynamicCast()->setEnableSoftmaxPostProcessing(enable); + return *this; +} + +bool ClassificationModel::getEnableSoftmaxPostProcessing() const +{ + CV_Assert(impl != nullptr && impl.dynamicCast() != nullptr); + return impl.dynamicCast()->getEnableSoftmaxPostProcessing(); } std::pair ClassificationModel::classify(InputArray frame) { - std::vector outs; - impl->processFrame(frame, outs); - CV_Assert(outs.size() == 1); - - double conf; - cv::Point maxLoc; - minMaxLoc(outs[0].reshape(1, 1), nullptr, &conf, nullptr, &maxLoc); - return {maxLoc.x, static_cast(conf)}; + CV_Assert(impl != nullptr && impl.dynamicCast() != nullptr); + return impl.dynamicCast()->classify(frame); } void ClassificationModel::classify(InputArray frame, int& classId, float& conf) @@ -732,7 +799,7 @@ struct TextRecognitionModel_Impl : public Model::Impl virtual std::string ctcPrefixBeamSearchDecode(const Mat& prediction) { - // CTC prefix beam seach decode. + // CTC prefix beam search decode. // For more detail, refer to: // https://distill.pub/2017/ctc/#inference // https://gist.github.com/awni/56369a90d03953e370f3964c826ed4b0i @@ -1500,4 +1567,4 @@ int TextDetectionModel_DB::getMaxCandidates() const } -}} // namespace +}} // namespace diff --git a/modules/dnn/src/net.cpp b/modules/dnn/src/net.cpp new file mode 100644 index 0000000000..901101b1e0 --- /dev/null +++ b/modules/dnn/src/net.cpp @@ -0,0 +1,414 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "net_impl.hpp" + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + +Net::Net() + : impl(makePtr()) +{ +} + +Net::~Net() +{ +} + +int Net::addLayer(const String& name, const String& type, const int& dtype, LayerParams& params) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->addLayer(name, type, dtype, params); +} + +int Net::addLayer(const String& name, const String& type, LayerParams& params) +{ + CV_TRACE_FUNCTION(); + return addLayer(name, type, CV_32F, params); +} + +int Net::addLayerToPrev(const String& name, const String& type, const int& dtype, LayerParams& params) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->addLayerToPrev(name, type, dtype, params); +} + +int Net::addLayerToPrev(const String& name, const String& type, LayerParams& params) +{ + CV_TRACE_FUNCTION(); + return addLayerToPrev(name, type, CV_32F, params); +} + +void Net::connect(int outLayerId, int outNum, int inpLayerId, int inpNum) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + impl->connect(outLayerId, outNum, inpLayerId, inpNum); +} + +void Net::connect(String _outPin, String _inPin) +{ + CV_TRACE_FUNCTION(); + + CV_Assert(impl); + LayerPin outPin = impl->getPinByAlias(_outPin); + LayerPin inpPin = impl->getPinByAlias(_inPin); + + CV_Assert(outPin.valid() && inpPin.valid()); + + impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid); +} + +int Net::registerOutput(const std::string& outputName, int layerId, int outputPort) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->registerOutput(outputName, layerId, outputPort); +} + +Mat Net::forward(const String& outputName) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + return impl->forward(outputName); +} + +AsyncArray Net::forwardAsync(const String& outputName) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + return impl->forwardAsync(outputName); +} + +void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + return impl->forward(outputBlobs, outputName); +} + +void Net::forward(OutputArrayOfArrays outputBlobs, + const std::vector& outBlobNames) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + return impl->forward(outputBlobs, outBlobNames); +} + +void Net::forward(std::vector>& outputBlobs, + const std::vector& outBlobNames) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + return impl->forward(outputBlobs, outBlobNames); +} + +// FIXIT drop from inference API +Net Net::quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + return impl->quantize(calibData, inputsDtype, outputsDtype); +} + +// FIXIT drop from inference API +void Net::getInputDetails(std::vector& scales, std::vector& zeropoints) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + return impl->getInputDetails(scales, zeropoints); +} + +// FIXIT drop from inference API +void Net::getOutputDetails(std::vector& scales, std::vector& zeropoints) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + return impl->getOutputDetails(scales, zeropoints); +} + +void Net::setPreferableBackend(int backendId) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG(backendId); + CV_Assert(impl); + return impl->setPreferableBackend(backendId); +} + +void Net::setPreferableTarget(int targetId) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG(targetId); + CV_Assert(impl); + return impl->setPreferableTarget(targetId); +} + +void Net::setInputsNames(const std::vector& inputBlobNames) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->setInputsNames(inputBlobNames); +} + +void Net::setInputShape(const String& inputName, const MatShape& shape) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->setInputShape(inputName, shape); +} + +void Net::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + CV_Assert(impl); + return impl->setInput(blob, name, scalefactor, mean); +} + +Mat Net::getParam(int layer, int numParam) const +{ + CV_Assert(impl); + return impl->getParam(layer, numParam); +} + +void Net::setParam(int layer, int numParam, const Mat& blob) +{ + CV_Assert(impl); + return impl->setParam(layer, numParam, blob); +} + +int Net::getLayerId(const String& layer) const +{ + CV_Assert(impl); + return impl->getLayerId(layer); +} + +String Net::dump() +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + return impl->dump(true); +} + +void Net::dumpToFile(const String& path) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + CV_Assert(!empty()); + std::ofstream file(path.c_str()); + file << dump(); + file.close(); +} + +Ptr Net::getLayer(int layerId) const +{ + CV_Assert(impl); + return impl->getLayer(layerId); +} +Ptr Net::getLayer(const LayerId& layerId) const +{ + CV_Assert(impl); + return impl->getLayer(layerId); +} + +std::vector> Net::getLayerInputs(int layerId) const +{ + CV_Assert(impl); + return impl->getLayerInputs(layerId); +} + +std::vector Net::getLayerNames() const +{ + CV_Assert(impl); + return impl->getLayerNames(); +} + +bool Net::empty() const +{ + CV_Assert(impl); + return impl->empty(); +} + +// FIXIT drop "unconnected" API +std::vector Net::getUnconnectedOutLayers() const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getUnconnectedOutLayers(); +} + +// FIXIT drop "unconnected" API +std::vector Net::getUnconnectedOutLayersNames() const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getUnconnectedOutLayersNames(); +} + +void Net::getLayersShapes(const ShapesVec& netInputShapes, + std::vector& layersIds, + std::vector& inLayersShapes, + std::vector& outLayersShapes) const +{ + CV_Assert(impl); + return impl->getLayersShapes(netInputShapes, layersIds, inLayersShapes, outLayersShapes); +} + +void Net::getLayersShapes(const MatShape& netInputShape, + std::vector& layerIds, + std::vector& inLayersShapes, + std::vector& outLayersShapes) const +{ + getLayersShapes(ShapesVec(1, netInputShape), + layerIds, inLayersShapes, outLayersShapes); +} + +void Net::getLayerShapes(const MatShape& netInputShape, + const int layerId, + ShapesVec& inLayerShapes, + ShapesVec& outLayerShapes) const +{ + getLayerShapes(ShapesVec(1, netInputShape), + layerId, inLayerShapes, outLayerShapes); +} + +void Net::getLayerShapes(const ShapesVec& netInputShapes, + const int layerId, + ShapesVec& inLayerShapes, + ShapesVec& outLayerShapes) const +{ + CV_Assert(impl); + LayerShapes shapes; + impl->getLayerShapes(netInputShapes, layerId, shapes); + inLayerShapes = shapes.in; + outLayerShapes = shapes.out; +} + +int64 Net::getFLOPS(const std::vector& netInputShapes) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getFLOPS(netInputShapes); +} + +int64 Net::getFLOPS(const MatShape& netInputShape) const +{ + return getFLOPS(std::vector(1, netInputShape)); +} + +int64 Net::getFLOPS(const int layerId, + const std::vector& netInputShapes) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getFLOPS(layerId, netInputShapes); +} + +int64 Net::getFLOPS(const int layerId, + const MatShape& netInputShape) const +{ + return getFLOPS(layerId, std::vector(1, netInputShape)); +} + +void Net::getLayerTypes(std::vector& layersTypes) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getLayerTypes(layersTypes); +} + +int Net::getLayersCount(const String& layerType) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getLayersCount(layerType); +} + +void Net::getMemoryConsumption(const int layerId, + const std::vector& netInputShapes, + size_t& weights, size_t& blobs) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getMemoryConsumption(layerId, netInputShapes, weights, blobs); +} + +void Net::getMemoryConsumption(const std::vector& netInputShapes, + size_t& weights, size_t& blobs) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getMemoryConsumption(netInputShapes, weights, blobs); +} + +void Net::getMemoryConsumption(const int layerId, + const MatShape& netInputShape, + size_t& weights, size_t& blobs) const +{ + getMemoryConsumption(layerId, std::vector(1, netInputShape), + weights, blobs); +} + +void Net::getMemoryConsumption(const MatShape& netInputShape, + size_t& weights, size_t& blobs) const +{ + getMemoryConsumption(std::vector(1, netInputShape), + weights, blobs); +} + +void Net::getMemoryConsumption(const std::vector& netInputShapes, + std::vector& layerIds, std::vector& weights, + std::vector& blobs) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getMemoryConsumption(netInputShapes, layerIds, weights, blobs); +} + +void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector& layerIds, + std::vector& weights, std::vector& blobs) const +{ + getMemoryConsumption(std::vector(1, netInputShape), layerIds, + weights, blobs); +} + +// FIXIT return old value or add get method +void Net::enableFusion(bool fusion) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->enableFusion(fusion); +} + +void Net::setHalideScheduler(const String& scheduler) +{ + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(scheduler, "scheduler", scheduler.c_str()); + CV_Assert(impl); + return impl->setHalideScheduler(scheduler); +} + +int64 Net::getPerfProfile(std::vector& timings) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->getPerfProfile(timings); +} + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/net_impl.cpp b/modules/dnn/src/net_impl.cpp new file mode 100644 index 0000000000..24fb31f03e --- /dev/null +++ b/modules/dnn/src/net_impl.cpp @@ -0,0 +1,2107 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "net_impl.hpp" + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +static int g_networkId = 0; + + +detail::NetImplBase::NetImplBase() + : networkId(CV_XADD(&g_networkId, 1)) + , networkDumpCounter(0) + , dumpLevel(getParam_DNN_NETWORK_DUMP()) +{ + // nothing +} + + +std::string detail::NetImplBase::getDumpFileNameBase() const +{ + std::string dumpFileNameBase = cv::format("ocv_dnn_net_%05d_%02d", networkId, networkDumpCounter++); + return dumpFileNameBase; +} + + +Net::Impl::Impl() +{ + // allocate fake net input layer + netInputLayer = Ptr(new DataLayer()); + LayerData& inpl = layers.insert(make_pair(0, LayerData())).first->second; + inpl.id = 0; + netInputLayer->name = inpl.name = "_input"; + inpl.type = "__NetInputLayer__"; + inpl.layerInstance = netInputLayer; + layerNameToId.insert(std::make_pair(inpl.name, inpl.id)); + + lastLayerId = 0; + netWasAllocated = false; + netWasQuantized = false; + fusion = true; + isAsync = false; + preferableBackend = DNN_BACKEND_DEFAULT; + preferableTarget = DNN_TARGET_CPU; + skipInfEngineInit = false; + hasDynamicShapes = false; +} + + +bool Net::Impl::empty() const +{ + return layers.size() <= 1; // first layer is default Data layer +} + + +void Net::Impl::clear() +{ + CV_TRACE_FUNCTION(); + + MapIdToLayerData::iterator it; + for (it = layers.begin(); it != layers.end(); it++) + { + if (it->second.id != 0) + { + it->second.inputBlobs.clear(); + it->second.outputBlobs.clear(); + it->second.internals.clear(); + } + it->second.skip = false; + // it->second.consumers.clear(); + Ptr currLayer = it->second.layerInstance; + + if (currLayer.empty()) + continue; + + currLayer->unsetAttached(); + } + netWasAllocated = false; + layersTimings.clear(); +} + + +void Net::Impl::setUpNet(const std::vector& blobsToKeep_) +{ + CV_TRACE_FUNCTION(); + + if (dumpLevel && networkDumpCounter == 0) + { + dumpNetworkToFile(); + } + + if (preferableBackend == DNN_BACKEND_DEFAULT) + preferableBackend = (Backend)getParam_DNN_BACKEND_DEFAULT(); +#ifdef HAVE_INF_ENGINE + if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE) + preferableBackend = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; // = getInferenceEngineBackendTypeParam(); +#endif + + CV_Assert(preferableBackend != DNN_BACKEND_OPENCV || + preferableTarget == DNN_TARGET_CPU || + preferableTarget == DNN_TARGET_OPENCL || + preferableTarget == DNN_TARGET_OPENCL_FP16); + CV_Assert(preferableBackend != DNN_BACKEND_HALIDE || + preferableTarget == DNN_TARGET_CPU || + preferableTarget == DNN_TARGET_OPENCL); +#ifdef HAVE_INF_ENGINE + if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + CV_Assert( + (preferableTarget == DNN_TARGET_CPU && (!isArmComputePlugin() || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) || + preferableTarget == DNN_TARGET_OPENCL || + preferableTarget == DNN_TARGET_OPENCL_FP16 || + preferableTarget == DNN_TARGET_MYRIAD || + preferableTarget == DNN_TARGET_HDDL || + preferableTarget == DNN_TARGET_FPGA + ); + } +#endif +#ifdef HAVE_WEBNN + if (preferableBackend == DNN_BACKEND_WEBNN) + { + CV_Assert(preferableTarget == DNN_TARGET_CPU || + preferableTarget == DNN_TARGET_OPENCL); + } +#endif + CV_Assert(preferableBackend != DNN_BACKEND_VKCOM || + preferableTarget == DNN_TARGET_VULKAN); + CV_Assert(preferableBackend != DNN_BACKEND_CUDA || + IS_DNN_CUDA_TARGET(preferableTarget)); + CV_Assert(preferableBackend != DNN_BACKEND_TIMVX || + preferableTarget == DNN_TARGET_NPU); + + if (!netWasAllocated || this->blobsToKeep != blobsToKeep_) + { + if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) +#ifndef HAVE_OPENCL + { + CV_LOG_WARNING(NULL, "DNN: OpenCL target is not available in this OpenCV build, switching to CPU."); + preferableTarget = DNN_TARGET_CPU; + } +#else + { + if (!getParam_DNN_OPENCL_ALLOW_ALL_DEVICES()) + { + // Current implementation is only valid for GPU (#11494) + if (ocl::Device::getDefault().type() != ocl::Device::TYPE_GPU) + { + CV_LOG_WARNING(NULL, "DNN: OpenCL target is not supported with current OpenCL device (tested with GPUs only), switching to CPU."); + preferableTarget = DNN_TARGET_CPU; + } + else if (preferableTarget == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel()) + { + CV_LOG_WARNING(NULL, + "DNN: OpenCL target with fp16 precision is not supported " + "with current OpenCL device (tested with Intel GPUs only), " + "switching to OpenCL with fp32 precision."); + preferableTarget = DNN_TARGET_OPENCL; + } + } + } +#endif + if (preferableBackend == DNN_BACKEND_VKCOM && !haveVulkan()) + { + preferableBackend = DNN_BACKEND_OPENCV; + preferableTarget = DNN_TARGET_CPU; + } + + if (preferableBackend == DNN_BACKEND_CUDA && !haveCUDA()) + { +#ifdef HAVE_CUDA + CV_LOG_WARNING(NULL, "unable to use CUDA backend; switching to CPU"); +#else + CV_LOG_WARNING(NULL, "DNN module was not built with CUDA backend; switching to CPU"); +#endif + preferableBackend = DNN_BACKEND_OPENCV; + preferableTarget = DNN_TARGET_CPU; + } + + if (preferableBackend == DNN_BACKEND_TIMVX && !haveTimVX()) + { + preferableBackend = DNN_BACKEND_OPENCV; + preferableTarget = DNN_TARGET_CPU; + } + + clear(); + + if (hasDynamicShapes) + { + updateLayersShapes(); + } + + this->blobsToKeep = blobsToKeep_; + + allocateLayers(blobsToKeep_); + + MapIdToLayerData::iterator it = layers.find(0); + CV_Assert(it != layers.end()); + it->second.skip = netInputLayer->skip; + + initBackend(blobsToKeep_); + + if (!netWasAllocated) + { +#ifdef HAVE_HALIDE + if (preferableBackend == DNN_BACKEND_HALIDE) + compileHalide(); +#else + CV_Assert(preferableBackend != DNN_BACKEND_HALIDE); +#endif + } + + netWasAllocated = true; + + if (dumpLevel) + { + dumpNetworkToFile(); + } + } +} + + +Ptr Net::Impl::getLayer(int layerId) const +{ + LayerData& ld = getLayerData(layerId); + return ld.getLayerInstance(); +} + + +Ptr Net::Impl::getLayer(const LayerId& layerId) const +{ + LayerData& ld = getLayerData(layerId); + return ld.getLayerInstance(); +} + + +int Net::Impl::getLayerId(const String& layerName) const +{ + std::map::const_iterator it = layerNameToId.find(layerName); + return (it != layerNameToId.end()) ? it->second : -1; +} + + +int Net::Impl::getLayerId(int id) const +{ + MapIdToLayerData::const_iterator it = layers.find(id); + return (it != layers.end()) ? id : -1; +} + + +int Net::Impl::getLayerId(DictValue& layerDesc) const +{ + if (layerDesc.isInt()) + return getLayerId(layerDesc.get()); + else if (layerDesc.isString()) + return getLayerId(layerDesc.get()); + + CV_Assert(layerDesc.isInt() || layerDesc.isString()); + return -1; +} + + +String Net::Impl::getLayerName(int id) const +{ + MapIdToLayerData::const_iterator it = layers.find(id); + return (it != layers.end()) ? it->second.name : "(unknown layer)"; +} + + +LayerData& Net::Impl::getLayerData(int id) const +{ + MapIdToLayerData::const_iterator it = layers.find(id); + + if (it == layers.end()) + CV_Error(Error::StsObjectNotFound, format("Layer with requested id=%d not found", id)); + + return const_cast(it->second); +} + + +LayerData& Net::Impl::getLayerData(const String& layerName) const +{ + int id = getLayerId(layerName); + + if (id < 0) + CV_Error(Error::StsError, "Requested layer \"" + layerName + "\" not found"); + + return getLayerData(id); +} + + +LayerData& Net::Impl::getLayerData(const DictValue& layerDesc) const +{ + CV_Assert(layerDesc.isInt() || layerDesc.isString()); + if (layerDesc.isInt()) + return getLayerData(layerDesc.get()); + else /*if (layerDesc.isString())*/ + return getLayerData(layerDesc.get()); +} + + +/*static*/ +void Net::Impl::addLayerInput(LayerData& ld, int inNum, LayerPin from) +{ + if ((int)ld.inputBlobsId.size() <= inNum) + { + ld.inputBlobsId.resize(inNum + 1); + } + else + { + LayerPin storedFrom = ld.inputBlobsId[inNum]; + if (storedFrom.valid() && !storedFrom.equal(from)) + CV_Error(Error::StsError, format("Input #%d of layer \"%s\" already was connected", + inNum, ld.name.c_str())); + } + + ld.inputBlobsId[inNum] = from; +} + + +int Net::Impl::resolvePinOutputName(LayerData& ld, const String& outName) const +{ + if (outName.empty()) + return 0; + return ld.getLayerInstance()->outputNameToIndex(outName); +} + + +LayerPin Net::Impl::getPinByAlias(const String& layerName) const +{ + LayerPin pin; + pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName); + + if (pin.lid >= 0) + pin.oid = resolvePinOutputName(getLayerData(pin.lid), layerName); + + return pin; +} + + +std::vector Net::Impl::getLayerOutPins(const String& layerName) const +{ + int lid = (layerName.empty()) ? 0 : getLayerId(layerName); + + MapIdToLayerData::const_iterator it = layers.find(lid); + if (it == layers.end()) + CV_Error_(Error::StsOutOfRange, ("Layer #%d is not valid", lid)); + const size_t nOutputs = it->second.outputBlobs.size(); + + std::vector pins; + for (int i = 0; i < nOutputs; i++) + { + pins.push_back(LayerPin(lid, i)); + } + + return pins; +} + + +// FIXIT remove dtype +int Net::Impl::addLayer(const String& name, const String& type, const int& dtype, LayerParams& params) +{ + int id = getLayerId(name); + if (id >= 0) + { + if (!DNN_DIAGNOSTICS_RUN || type != "NotImplemented") + { + CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net"); + return -1; + } + else + { + LayerData& ld = layers.find(id)->second; + ld.type = type; + ld.params = params; + return -1; + } + } + + id = ++lastLayerId; + layerNameToId.insert(std::make_pair(name, id)); + layers.insert(std::make_pair(id, LayerData(id, name, type, dtype, params))); + if (params.get("has_dynamic_shapes", false)) + hasDynamicShapes = true; + + if (dtype == CV_8S) + netWasQuantized = true; + + return id; +} + + +int Net::Impl::addLayerToPrev(const String& name, const String& type, const int& dtype, LayerParams& params) +{ + int prvLid = lastLayerId; + int newLid = addLayer(name, type, dtype, params); + connect(prvLid, 0, newLid, 0); + return newLid; +} + + +void Net::Impl::connect(int outLayerId, int outNum, int inLayerId, int inNum) +{ + CV_Assert(outLayerId < inLayerId); + LayerData& ldOut = getLayerData(outLayerId); + LayerData& ldInp = getLayerData(inLayerId); + + addLayerInput(ldInp, inNum, LayerPin(outLayerId, outNum)); + ldOut.requiredOutputs.insert(outNum); + ldOut.consumers.push_back(LayerPin(inLayerId, outNum)); + + CV_LOG_VERBOSE(NULL, 0, "DNN: connect(" << outLayerId << ":" << outNum << " ==> " << inLayerId << ":" << inNum << ")"); +} + + +int Net::Impl::registerOutput(const std::string& outputName, int layerId, int outputPort) +{ + int checkLayerId = getLayerId(outputName); + if (checkLayerId >= 0) + { + if (checkLayerId == layerId) + { + if (outputPort == 0) + { + // layer name correlates with its output name + CV_LOG_DEBUG(NULL, "DNN: register output='" << outputName << "': reuse layer with the same name and id=" << layerId << " to be linked"); + outputNameToId.insert(std::make_pair(outputName, layerId)); + return checkLayerId; + } + } + CV_Error_(Error::StsBadArg, ("Layer with name='%s' already exists id=%d (to be linked with %d:%d)", outputName.c_str(), checkLayerId, layerId, outputPort)); + } +#if 0 // TODO + if (outputPort == 0) + // make alias only, need to adopt getUnconnectedOutLayers() call +#endif + LayerParams outputLayerParams; + outputLayerParams.name = outputName; + outputLayerParams.type = "Identity"; + int dtype = CV_32F; // FIXIT remove + int outputLayerId = addLayer(outputLayerParams.name, outputLayerParams.type, dtype, outputLayerParams); + connect(layerId, outputPort, outputLayerId, 0); + CV_LOG_DEBUG(NULL, "DNN: register output='" << outputName << "' id=" << outputLayerId << " defined as " << layerId << ":" << outputPort); + outputNameToId.insert(std::make_pair(outputName, outputLayerId)); + return outputLayerId; +} + + +void Net::Impl::allocateLayer(int lid, const LayersShapesMap& layersShapes) +{ + CV_TRACE_FUNCTION(); + + LayerData& ld = layers[lid]; + + // already allocated + if (ld.flag) + return; + + size_t ninputs = ld.inputBlobsId.size(); +#if 0 + printf("layer %s:", ld.name.c_str()); + for (size_t i = 0; i < ninputs; i++) + { + int inp_lid = ld.inputBlobsId[i].lid; + LayerData &inp_ld = layers[inp_lid]; + int inp_outputs = (int)inp_ld.outputBlobs.size(); + std::cout << " " << inp_ld.name << "(" << inp_outputs; + + for( int j = 0; j < inp_outputs; j++ ) + { + std::cout << (j == 0 ? ": " : ", ") << inp_ld.outputBlobs[j].size; + } + std::cout << ")"; + } + printf("\n"); +#endif + + // determine parent layers + for (size_t i = 0; i < ninputs; i++) + ld.inputLayersId.insert(ld.inputBlobsId[i].lid); + + // allocate parents + for (std::set::const_iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++) + allocateLayer(*i, layersShapes); + + // bind inputs + if (ld.id == 0) // DataLayer + { + ninputs = netInputLayer->inputsData.size(); + ld.inputBlobsWrappers.resize(ninputs); + for (size_t i = 0; i < ninputs; i++) + ld.inputBlobsWrappers[i] = wrap(netInputLayer->inputsData[i]); + } + else + { + ld.inputBlobs.resize(ninputs); + ld.inputBlobsWrappers.resize(ninputs); + for (size_t i = 0; i < ninputs; i++) + { + LayerPin from = ld.inputBlobsId[i]; + CV_Assert(from.valid()); + CV_DbgAssert(layers.count(from.lid) && (int)layers[from.lid].outputBlobs.size() > from.oid); + ld.inputBlobs[i] = &layers[from.lid].outputBlobs[from.oid]; + ld.inputBlobsWrappers[i] = layers[from.lid].outputBlobsWrappers[from.oid]; + } + } + + LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid); + + CV_Assert(layerShapesIt != layersShapes.end()); + + if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_OPENCL_FP16 && ld.dtype == CV_32F) + ld.dtype = CV_16S; + + std::vector pinsForInternalBlobs; + blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs); + ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); + for (int i = 0; i < ld.outputBlobs.size(); ++i) + ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]); + + /* CUDA backend has its own system for internal blobs; we don't need these */ + ld.internalBlobsWrappers.resize((preferableBackend == DNN_BACKEND_CUDA || preferableBackend == DNN_BACKEND_TIMVX) ? 0 : ld.internals.size()); + for (int i = 0; i < ld.internalBlobsWrappers.size(); ++i) + ld.internalBlobsWrappers[i] = wrap(ld.internals[i]); + + Ptr layerPtr = ld.getLayerInstance(); + { + std::vector inps(ld.inputBlobs.size()); + for (int i = 0; i < ld.inputBlobs.size(); ++i) + { + inps[i] = *ld.inputBlobs[i]; + } + layerPtr->finalize(inps, ld.outputBlobs); + layerPtr->preferableTarget = preferableTarget; +#if 0 + std::cout << "\toutputs:"; + size_t noutputs = ld.outputBlobs.size(); + for (size_t j = 0; j < noutputs; j++) + { + std::cout << (j == 0 ? " " : ", ") << ld.outputBlobs[j].size; + } + std::cout << "\n"; +#endif + } + + // After allocation of layer, we decrease counters to it's input blobs. + blobManager.releaseReferences(ld.inputBlobsId); + blobManager.releaseReferences(pinsForInternalBlobs); + + ld.flag = 1; +} + + +void Net::Impl::allocateLayers(const std::vector& blobsToKeep_) +{ + CV_TRACE_FUNCTION(); + + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) + it->second.flag = 0; + + CV_Assert(!layers[0].outputBlobs.empty()); + ShapesVec inputShapes; + for (int i = 0; i < layers[0].outputBlobs.size(); i++) + { + Mat& inp = layers[0].outputBlobs[i]; + CV_Assert(inp.total()); + if (preferableBackend == DNN_BACKEND_OPENCV && + preferableTarget == DNN_TARGET_OPENCL_FP16 && + layers[0].dtype == CV_32F) + { + layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S); + } + inputShapes.push_back(shape(inp)); + } + LayersShapesMap layersShapes; + getLayersShapes(inputShapes, layersShapes); + + blobManager.reset(); + backendWrappers.clear(); + + for (auto& layer : layers) + { + auto& ld = layer.second; + ld.inputBlobsWrappers.clear(); + ld.outputBlobsWrappers.clear(); + ld.internalBlobsWrappers.clear(); + } + + // Fake references to input blobs. + for (int i = 0; i < layers[0].outputBlobs.size(); ++i) + blobManager.addReference(LayerPin(0, i)); + for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); ++it) + { + const LayerData& ld = it->second; + blobManager.addReferences(ld.inputBlobsId); + } + + for (int i = 0; i < blobsToKeep_.size(); i++) + { + blobManager.addReference(blobsToKeep_[i]); + } + + for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++) + { + int lid = it->first; + allocateLayer(lid, layersShapes); + } + + layersTimings.resize(lastLayerId + 1, 0); + fuseLayers(blobsToKeep_); +} + + +void Net::Impl::forwardLayer(LayerData& ld) +{ + CV_TRACE_FUNCTION(); + + Ptr layer = ld.layerInstance; + + if (!ld.skip) + { + TickMeter tm; + tm.start(); + +#ifndef HAVE_VULKAN + std::map>::const_iterator it = ld.backendNodes.find(preferableBackend); +#else + std::map>::iterator it = ld.backendNodes.find(preferableBackend); +#endif + if (preferableBackend == DNN_BACKEND_OPENCV || it == ld.backendNodes.end() || it->second.empty()) + { + if (isAsync) + CV_Error(Error::StsNotImplemented, "Default implementation fallbacks in asynchronous mode"); + + if (!layer->supportBackend(DNN_BACKEND_OPENCV)) + CV_Error(Error::StsNotImplemented, format("Layer \"%s\" of type \"%s\" unsupported on OpenCV backend", + ld.name.c_str(), ld.type.c_str())); + +#ifdef HAVE_OPENCL + if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) + { + std::vector umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers); + std::vector umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); + std::vector umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers); + layer->forward(umat_inputBlobs, + umat_outputBlobs, + umat_internalBlobs); + if (getParam_DNN_CHECK_NAN_INF()) + { + bool fail = false; + for (size_t i = 0; i < umat_outputBlobs.size(); ++i) + { + UMat& u = umat_outputBlobs[i]; + Mat m; + if (u.depth() == CV_16S) // FP16 + convertFp16(u, m); + else + m = u.getMat(ACCESS_READ); + if (!checkRange(m)) + { + std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; + std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; + fail = true; + } + else if (!checkRange(m, true, NULL, -1e6, 1e6)) + { + std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; + std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; + fail = true; + } + } + if (fail) + { + for (size_t i = 0; i < umat_inputBlobs.size(); ++i) + { + UMat& u = umat_inputBlobs[i]; + Mat m; + if (u.depth() == CV_16S) // FP16 + convertFp16(u, m); + else + m = u.getMat(ACCESS_READ); + std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl; + if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl; + } + for (size_t i = 0; i < umat_outputBlobs.size(); ++i) + { + UMat& u = umat_outputBlobs[i]; + Mat m; + if (u.depth() == CV_16S) // FP16 + convertFp16(u, m); + else + m = u.getMat(ACCESS_READ); + std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl; + if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl; + } + for (size_t i = 0; i < umat_internalBlobs.size(); ++i) + { + UMat& u = umat_internalBlobs[i]; + Mat m; + if (u.depth() == CV_16S) // FP16 + convertFp16(u, m); + else + m = u.getMat(ACCESS_READ); + std::cout << "INTERNAL " << i << " " << shape(m) << std::endl; + if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl; + } + if (getParam_DNN_CHECK_NAN_INF_RAISE_ERROR()) + CV_Assert(!fail); + } + } + OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs); + } + else +#endif + { + for (int i = 0, n = ld.inputBlobsWrappers.size(); i < n; ++i) + { + if (!ld.inputBlobsWrappers[i].empty()) + ld.inputBlobsWrappers[i]->copyToHost(); + } + + std::vector inps(ld.inputBlobs.size()); + for (int i = 0; i < ld.inputBlobs.size(); ++i) + { + inps[i] = *ld.inputBlobs[i]; + } + layer->forward(inps, ld.outputBlobs, ld.internals); + + if (getParam_DNN_CHECK_NAN_INF()) + { + bool fail = false; + for (size_t i = 0; i < ld.outputBlobs.size(); ++i) + { + const Mat& m = ld.outputBlobs[i]; + if (!checkRange(m)) + { + std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; + std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; + fail = true; + } + else if (!checkRange(m, true, NULL, -1e6, 1e6)) + { + std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl; + std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl; + fail = true; + } + } + if (fail) + { + for (size_t i = 0; i < ld.inputBlobs.size(); ++i) + { + const Mat* pM = ld.inputBlobs[i]; + if (!pM) + { + std::cout << "INPUT " << i << " is NULL" << std::endl; + continue; + } + const Mat& m = *pM; + std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; + if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl; + } + for (size_t i = 0; i < ld.outputBlobs.size(); ++i) + { + const Mat& m = ld.outputBlobs[i]; + std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; + if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl; + } + for (size_t i = 0; i < ld.internals.size(); ++i) + { + const Mat& m = ld.internals[i]; + std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl; + if (getParam_DNN_CHECK_NAN_INF_DUMP()) std::cout << m.reshape(1, 1) << std::endl; + } + if (getParam_DNN_CHECK_NAN_INF_RAISE_ERROR()) + CV_Assert(!fail); + } + } + + for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i) + { + if (!ld.outputBlobsWrappers[i].empty()) + ld.outputBlobsWrappers[i]->setHostDirty(); + } + } + } + else + { + Ptr node = it->second; + CV_Assert(!node.empty()); + if (preferableBackend == DNN_BACKEND_CUDA) + { + CV_Assert(haveCUDA()); + +#ifdef HAVE_CUDA + Ptr cudaNode = node.dynamicCast(); + CV_Assert(!cudaNode.empty()); + + cudaNode->forward(ld.inputBlobsWrappers, ld.outputBlobsWrappers, cudaInfo->workspace); + + for (auto id : ld.cudaD2HBackgroundTransfers) + { + auto wrapper = ld.outputBlobsWrappers[id].dynamicCast(); + wrapper->copyToHostInBackground(); + } +#endif + } + else if (preferableBackend == DNN_BACKEND_HALIDE) + { + forwardHalide(ld.outputBlobsWrappers, node); + } +#ifdef HAVE_INF_ENGINE + else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + forwardNgraph(ld.outputBlobsWrappers, node, isAsync); + } +#endif + else if (preferableBackend == DNN_BACKEND_WEBNN) + { + forwardWebnn(ld.outputBlobsWrappers, node, isAsync); + } + else if (preferableBackend == DNN_BACKEND_TIMVX) + { + forwardTimVX(ld.outputBlobsWrappers, node); + } +#ifdef HAVE_VULKAN + else if (preferableBackend == DNN_BACKEND_VKCOM) + { + try + { + forwardVkCom(ld.outputBlobsWrappers, node); + } + catch (const cv::Exception& e) + { + CV_LOG_ERROR(NULL, "forwardVkCom failed, fallback to CPU implementation. " << e.what()); + it->second = Ptr(); + forwardLayer(ld); + } + } +#endif + else + { + CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); + } + } + + tm.stop(); + int64 t = tm.getTimeTicks(); + layersTimings[ld.id] = (t > 0) ? t : t + 1; // zero for skipped layers only + } + else + { + layersTimings[ld.id] = 0; + } + + ld.flag = 1; +} + + +void Net::Impl::forwardToLayer(LayerData& ld, bool clearFlags) +{ + CV_TRACE_FUNCTION(); + + if (clearFlags) + { + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) + it->second.flag = 0; + } + + // already was forwarded + if (ld.flag) + return; + + // forward parents + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end() && (it->second.id < ld.id); ++it) + { + LayerData& ld = it->second; + if (ld.flag) + continue; + forwardLayer(ld); + } + + // forward itself + forwardLayer(ld); + +#ifdef HAVE_CUDA + if (preferableBackend == DNN_BACKEND_CUDA) + cudaInfo->context.stream.synchronize(); +#endif +} + + +Mat Net::Impl::forward(const String& outputName) +{ + CV_Assert(!empty()); + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + + String layerName = outputName; + + if (layerName.empty()) + { + std::vector layerNames = getLayerNames(); + CV_Assert(!layerNames.empty()); + layerName = layerNames.back(); + } + + std::vector pins(1, getPinByAlias(layerName)); + setUpNet(pins); + forwardToLayer(getLayerData(layerName)); + + return getBlob(layerName); +} + + +AsyncArray Net::Impl::forwardAsync(const String& outputName) +{ + CV_Assert(!empty()); + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + +#ifdef CV_CXX11 + String layerName = outputName; + + if (layerName.empty()) + { + std::vector layerNames = getLayerNames(); + CV_Assert(!layerNames.empty()); + layerName = layerNames.back(); + } + + std::vector pins(1, getPinByAlias(layerName)); + setUpNet(pins); + + if (preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward is supported for Inference Engine backend only"); + + isAsync = true; + forwardToLayer(getLayerData(layerName)); + isAsync = false; + + return getBlobAsync(layerName); +#else + CV_Error(Error::StsNotImplemented, "DNN: Asynchronous forward requires build with enabled C++11"); +#endif // CV_CXX11 +} + + +void Net::Impl::forward(OutputArrayOfArrays outputBlobs, const String& outputName) +{ + CV_Assert(!empty()); + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + + String layerName = outputName; + + if (layerName.empty()) + { + std::vector layerNames = getLayerNames(); + CV_Assert(!layerNames.empty()); + layerName = layerNames.back(); + } + + std::vector pins(1, getPinByAlias(layerName)); + setUpNet(pins); + forwardToLayer(getLayerData(layerName)); + + LayerPin pin = getPinByAlias(layerName); + LayerData& ld = layers[pin.lid]; + + if (outputBlobs.isUMat()) + { + getBlob(layerName).copyTo(outputBlobs); + } + else if (outputBlobs.isMat()) + { + outputBlobs.assign(getBlob(layerName)); + } + else if (outputBlobs.isMatVector()) + { + if (preferableTarget != DNN_TARGET_CPU) + { + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + CV_Assert(!ld.outputBlobsWrappers[i].empty()); + ld.outputBlobsWrappers[i]->copyToHost(); + } + } + if (ld.outputBlobs[0].depth() == CV_16S) + { + std::vector& outputvec = *(std::vector*)outputBlobs.getObj(); + outputvec.resize(ld.outputBlobs.size()); + for (int i = 0; i < outputvec.size(); i++) + convertFp16(ld.outputBlobs[i], outputvec[i]); + } + else + { + // Output depth can be CV_32F or CV_8S + std::vector& outputvec = *(std::vector*)outputBlobs.getObj(); + outputvec = ld.outputBlobs; + } + } + else if (outputBlobs.isUMatVector()) + { + std::vector& outputvec = *(std::vector*)outputBlobs.getObj(); + +#ifdef HAVE_OPENCL + if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) + { + if (preferableTarget == DNN_TARGET_OPENCL) + outputvec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); + else if (preferableTarget == DNN_TARGET_OPENCL_FP16) + { + std::vector out_vec = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); + outputvec.resize(out_vec.size()); + for (int i = 0; i < out_vec.size(); i++) + convertFp16(out_vec[i], outputvec[i]); + } + } + else +#endif + { + outputvec.resize(ld.outputBlobs.size()); + for (int i = 0; i < outputvec.size(); ++i) + ld.outputBlobs[i].copyTo(outputvec[i]); + } + } +} + + +void Net::Impl::forward(OutputArrayOfArrays outputBlobs, + const std::vector& outBlobNames) +{ + CV_Assert(!empty()); + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + + std::vector pins; + for (int i = 0; i < outBlobNames.size(); i++) + { + pins.push_back(getPinByAlias(outBlobNames[i])); + } + + setUpNet(pins); + + LayerPin out = getLatestLayerPin(pins); + + forwardToLayer(getLayerData(out.lid)); + + std::vector matvec; + for (int i = 0; i < pins.size(); i++) + { + matvec.push_back(getBlob(pins[i])); + } + + outputBlobs.create((int)matvec.size(), 1, CV_32F/*FIXIT*/, -1); // allocate vector + outputBlobs.assign(matvec); +} + + +void Net::Impl::forward(std::vector>& outputBlobs, + const std::vector& outBlobNames) +{ + CV_Assert(!empty()); + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + + std::vector pins; + for (int i = 0; i < outBlobNames.size(); i++) + { + pins.push_back(getPinByAlias(outBlobNames[i])); + } + + setUpNet(pins); + + LayerPin out = getLatestLayerPin(pins); + + forwardToLayer(getLayerData(out.lid)); + + outputBlobs.resize(outBlobNames.size()); + for (int i = 0; i < outBlobNames.size(); i++) + { + std::vector lp = getLayerOutPins(outBlobNames[i]); + outputBlobs[i].resize(lp.size()); + for (int j = 0; j < lp.size(); j++) + { + outputBlobs[i][j] = getBlob(lp[j]); + } + } +} + + +void Net::Impl::getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes) +{ + CV_CheckGE(id, 0, ""); + CV_CheckLT(id, (int)layers.size(), ""); + LayerData& layerData = layers[id]; + std::vector& inputLayerIds = layerData.inputBlobsId; + LayerShapes& layerShapes = inOutShapes[id]; + + if (id == 0 && layerShapes.in[0].empty()) + { + if (!layerData.outputBlobs.empty()) + { + ShapesVec shapes; + for (int i = 0; i < layerData.outputBlobs.size(); i++) + { + Mat& inp = layerData.outputBlobs[i]; + CV_Assert(!inp.empty()); + shapes.push_back(shape(inp)); + } + layerShapes.in = shapes; + } + else + { + const std::vector& inputShapes = netInputLayer->shapes; + bool none = true; + for (size_t i = 0; i < inputShapes.size(); i++) + { + if (!inputShapes[i].empty()) + { + none = false; + break; + } + } + if (none) + { + layerShapes.out.clear(); + return; + } + else + { + layerShapes.in = inputShapes; + } + } + } + + if (layerShapes.in.empty()) + { + for (int i = 0; i < inputLayerIds.size(); i++) + { + int layerId = inputLayerIds[i].lid; + LayersShapesMap::const_iterator it = inOutShapes.find(layerId); + if (it == inOutShapes.end() || it->second.out.empty()) + { + getLayerShapesRecursively(layerId, inOutShapes); + it = inOutShapes.find(layerId); + CV_Assert(it != inOutShapes.end()); + } + const int out_port = inputLayerIds[i].oid; + CV_CheckLT(out_port, (int)it->second.out.size(), ""); + const MatShape& shape = it->second.out[out_port]; + layerShapes.in.push_back(shape); + } + } + const ShapesVec& is = layerShapes.in; + ShapesVec& os = layerShapes.out; + ShapesVec& ints = layerShapes.internal; + int requiredOutputs = layerData.requiredOutputs.size(); + Ptr l = layerData.getLayerInstance(); + CV_Assert(l); + bool layerSupportInPlace = false; + try + { + layerSupportInPlace = l->getMemoryShapes(is, requiredOutputs, os, ints); + } + catch (const cv::Exception& e) + { + CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() throws exception." << + " inputs=" << is.size() << + " outputs=" << os.size() << "/" << requiredOutputs << + " blobs=" << l->blobs.size()); + for (size_t i = 0; i < is.size(); ++i) + { + CV_LOG_ERROR(NULL, " input[" << i << "] = " << toString(is[i])); + } + for (size_t i = 0; i < os.size(); ++i) + { + CV_LOG_ERROR(NULL, " output[" << i << "] = " << toString(os[i])); + } + for (size_t i = 0; i < l->blobs.size(); ++i) + { + CV_LOG_ERROR(NULL, " blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i]))); + } + CV_LOG_ERROR(NULL, "Exception message: " << e.what()); + throw; + } + layerShapes.supportInPlace = layerSupportInPlace; + + try + { + for (int i = 0; i < ints.size(); i++) + CV_CheckGT(total(ints[i]), 0, ""); + + for (int i = 0; i < os.size(); i++) + CV_CheckGT(total(os[i]), 0, ""); + } + catch (const cv::Exception& e) + { + CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() post validation failed." << + " inputs=" << is.size() << + " outputs=" << os.size() << "/" << requiredOutputs << + " blobs=" << l->blobs.size() << + " inplace=" << layerSupportInPlace); + for (size_t i = 0; i < is.size(); ++i) + { + CV_LOG_ERROR(NULL, " input[" << i << "] = " << toString(is[i])); + } + for (size_t i = 0; i < os.size(); ++i) + { + CV_LOG_ERROR(NULL, " output[" << i << "] = " << toString(os[i])); + } + for (size_t i = 0; i < l->blobs.size(); ++i) + { + CV_LOG_ERROR(NULL, " blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i]))); + } + CV_LOG_ERROR(NULL, "Exception message: " << e.what()); + throw; + } +} + +void Net::Impl::getLayersShapes( + const ShapesVec& netInputShapes, + std::vector& layersIds, + std::vector& inLayersShapes, + std::vector& outLayersShapes) /*const*/ +{ + layersIds.clear(); + inLayersShapes.clear(); + outLayersShapes.clear(); + + Impl::LayersShapesMap inOutShapes; + getLayersShapes(netInputShapes, inOutShapes); + + for (Impl::LayersShapesMap::const_iterator it = inOutShapes.begin(); + it != inOutShapes.end(); it++) + { + layersIds.push_back(it->first); + inLayersShapes.push_back(it->second.in); + outLayersShapes.push_back(it->second.out); + } +} + + +void Net::Impl::getLayersShapes(const ShapesVec& netInputShapes, + LayersShapesMap& inOutShapes) +{ + inOutShapes.clear(); + + inOutShapes[0].in = netInputShapes; // insert shape for first input layer + for (MapIdToLayerData::const_iterator it = layers.begin(); + it != layers.end(); it++) + { + getLayerShapesRecursively(it->first, inOutShapes); + } +} + +void Net::Impl::getLayerShapes(const ShapesVec& netInputShapes, + const int layerId, + LayerShapes& shapes) +{ + LayersShapesMap inOutShapes; + inOutShapes[0].in = netInputShapes; // insert shape for first input layer + getLayerShapesRecursively(layerId, inOutShapes); + shapes = inOutShapes[layerId]; +} + +void Net::Impl::updateLayersShapes() +{ + CV_LOG_DEBUG(NULL, "updateLayersShapes() with layers.size=" << layers.size()); + CV_Assert(netInputLayer); + DataLayer& inputLayer = *netInputLayer; + LayerData& inputLayerData = layers[0]; + CV_Assert(inputLayerData.layerInstance.get() == &inputLayer); + CV_Assert(!inputLayerData.outputBlobs.empty()); + ShapesVec inputShapes; + for (int i = 0; i < inputLayerData.outputBlobs.size(); i++) + { + Mat& inp = inputLayerData.outputBlobs[i]; + CV_Assert(!inp.empty()); + if (preferableBackend == DNN_BACKEND_OPENCV && // FIXIT: wrong place for output allocation + preferableTarget == DNN_TARGET_OPENCL_FP16 && + inputLayerData.dtype == CV_32F) + { + inp.create(inp.dims, inp.size, CV_16S); + } + inputShapes.push_back(shape(inp)); + } + CV_LOG_DEBUG(NULL, toString(inputShapes, "Network input shapes")); + LayersShapesMap layersShapes; + layersShapes[0].in = inputShapes; + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) + { + int layerId = it->first; + LayerData& layerData = it->second; + const std::vector& inputLayerIds = layerData.inputBlobsId; + LayerShapes& layerShapes = layersShapes[layerId]; + CV_LOG_DEBUG(NULL, "layer " << layerId << ": [" << layerData.type << "]:(" << layerData.name << ") with inputs.size=" << inputLayerIds.size()); + if (layerShapes.in.empty()) + { + for (int i = 0; i < inputLayerIds.size(); i++) + { + const LayerPin& inputPin = inputLayerIds[i]; + int inputLayerId = inputPin.lid; + CV_LOG_DEBUG(NULL, " input[" << i << "] " << inputLayerId << ":" << inputPin.oid << " as [" << layers[inputLayerId].type << "]:(" << layers[inputLayerId].name << ")"); + LayersShapesMap::const_iterator inputIt = layersShapes.find(inputLayerId); + if (inputIt == layersShapes.end() || inputIt->second.out.empty()) + { + getLayerShapesRecursively(inputLayerId, layersShapes); + } + const MatShape& shape = layersShapes[inputLayerId].out[inputPin.oid]; + layerShapes.in.push_back(shape); + } + layerData.getLayerInstance()->updateMemoryShapes(layerShapes.in); + } + CV_LOG_DEBUG(NULL, "Layer " << layerId << ": " << toString(layerShapes.in, "input shapes")); + CV_LOG_IF_DEBUG(NULL, !layerShapes.out.empty(), "Layer " << layerId << ": " << toString(layerShapes.out, "output shapes")); + CV_LOG_IF_DEBUG(NULL, !layerShapes.internal.empty(), "Layer " << layerId << ": " << toString(layerShapes.internal, "internal shapes")); + } + CV_LOG_DEBUG(NULL, "updateLayersShapes() - DONE"); +} + + +LayerPin Net::Impl::getLatestLayerPin(const std::vector& pins) const +{ + return *std::max_element(pins.begin(), pins.end()); +} + +Mat Net::Impl::getBlob(const LayerPin& pin) const +{ + CV_TRACE_FUNCTION(); + + if (!pin.valid()) + CV_Error(Error::StsObjectNotFound, "Requested blob not found"); + + MapIdToLayerData::const_iterator it = layers.find(pin.lid); + if (it == layers.end()) + CV_Error_(Error::StsOutOfRange, ("Layer #%d is not valid (output #%d requested)", pin.lid, pin.oid)); + + const LayerData& ld = it->second; + if ((size_t)pin.oid >= ld.outputBlobs.size()) + { + CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %zu outputs, " + "the #%d was requested", + ld.name.c_str(), ld.outputBlobs.size(), pin.oid)); + } + if (preferableTarget != DNN_TARGET_CPU) + { + CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty()); + // Transfer data to CPU if it's require. + ld.outputBlobsWrappers[pin.oid]->copyToHost(); + } + + if (ld.outputBlobs[pin.oid].depth() == CV_16S) + { + Mat output_blob; + convertFp16(ld.outputBlobs[pin.oid], output_blob); + return output_blob; + } + else + return ld.outputBlobs[pin.oid]; +} + +Mat Net::Impl::getBlob(String outputName) const +{ + return getBlob(getPinByAlias(outputName)); +} + + +AsyncArray Net::Impl::getBlobAsync(const LayerPin& pin) +{ + CV_TRACE_FUNCTION(); +#ifdef HAVE_INF_ENGINE + if (!pin.valid()) + CV_Error(Error::StsObjectNotFound, "Requested blob not found"); + + LayerData& ld = layers[pin.lid]; + if ((size_t)pin.oid >= ld.outputBlobs.size()) + { + CV_Error(Error::StsOutOfRange, format("Layer \"%s\" produce only %d outputs, " + "the #%d was requested", + ld.name.c_str(), (int)ld.outputBlobs.size(), (int)pin.oid)); + } + if (preferableTarget != DNN_TARGET_CPU) + { + CV_Assert(!ld.outputBlobsWrappers.empty() && !ld.outputBlobsWrappers[pin.oid].empty()); + // Transfer data to CPU if it's require. + ld.outputBlobsWrappers[pin.oid]->copyToHost(); + } + CV_Assert(preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); + + Ptr wrapper = ld.outputBlobsWrappers[pin.oid].dynamicCast(); + return std::move(wrapper->futureMat); +#else + CV_Error(Error::StsNotImplemented, "DNN: OpenVINO/nGraph backend is required"); +#endif // HAVE_INF_ENGINE +} + + +AsyncArray Net::Impl::getBlobAsync(String outputName) +{ + return getBlobAsync(getPinByAlias(outputName)); +} + + +void Net::Impl::setInputsNames(const std::vector& inputBlobNames) +{ + CV_Assert(netInputLayer); + netInputLayer->setNames(inputBlobNames); +} + + +void Net::Impl::setInputShape(const String& inputName, const MatShape& shape) +{ + CV_Assert(netInputLayer); + netInputLayer->setInputShape(inputName, shape); +} + + +void Net::Impl::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean) +{ + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + + LayerPin pin; + pin.lid = 0; + pin.oid = resolvePinOutputName(getLayerData(pin.lid), name); + + if (!pin.valid()) + CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found"); + + Mat blob_ = blob.getMat(); // can't use InputArray directly due MatExpr stuff + MatShape blobShape = shape(blob_); + + if (pin.lid == 0) + { + CV_Assert(!netInputLayer.empty()); + const DataLayer& netInputLayer = *(this->netInputLayer); + if (!netInputLayer.shapes.empty()) + { + CV_CheckLT(pin.oid, (int)netInputLayer.shapes.size(), ""); + const MatShape& inputShapeLimitation = netInputLayer.shapes[pin.oid]; + if (!inputShapeLimitation.empty()) + { + CV_CheckEQ(inputShapeLimitation.size(), blobShape.size(), ""); +#if 0 // TODO: DNNTestNetwork.MobileNet_SSD_Caffe_Different_Width_Height/0 + const size_t dims = inputShapeLimitation.size(); + for (size_t dim = 0; dim < dims; dim++) + { + if (dims >= 3 && dim == 0 && inputShapeLimitation[0] == 1) + continue; // don't limit batch + CV_CheckEQ(inputShapeLimitation[dim], blobShape[dim], ""); + } +#endif + } + } + } + + LayerData& ld = layers[pin.lid]; + const int numInputs = std::max(pin.oid + 1, (int)ld.requiredOutputs.size()); + ld.outputBlobs.resize(numInputs); + ld.outputBlobsWrappers.resize(numInputs); + netInputLayer->inputsData.resize(numInputs); + netInputLayer->scaleFactors.resize(numInputs); + netInputLayer->means.resize(numInputs); + + MatShape prevShape = shape(netInputLayer->inputsData[pin.oid]); + bool oldShape = prevShape == blobShape; + + blob_.copyTo(netInputLayer->inputsData[pin.oid]); + if (!oldShape) + ld.outputBlobs[pin.oid] = netInputLayer->inputsData[pin.oid]; + + if (!ld.outputBlobsWrappers[pin.oid].empty()) + { + ld.outputBlobsWrappers[pin.oid]->setHostDirty(); + } + netInputLayer->scaleFactors[pin.oid] = scalefactor; + netInputLayer->means[pin.oid] = mean; + netWasAllocated = netWasAllocated && oldShape; +} + + +Mat Net::Impl::getParam(int layer, int numParam) const +{ + LayerData& ld = getLayerData(layer); + std::vector& layerBlobs = ld.getLayerInstance()->blobs; + CV_Assert(numParam < (int)layerBlobs.size()); + return layerBlobs[numParam]; +} + +void Net::Impl::setParam(int layer, int numParam, const Mat& blob) +{ + LayerData& ld = getLayerData(layer); + + std::vector& layerBlobs = ld.getLayerInstance()->blobs; + CV_Assert(numParam < (int)layerBlobs.size()); + // we don't make strong checks, use this function carefully + layerBlobs[numParam] = blob; +} + + +static +string dumpLayerParameterSize(const string& name, const LayerParams& lp) +{ + std::ostringstream out(name, std::ios::ate); + DictValue param = lp.get(name); + switch (param.size()) + { + case 1: out << " : "; break; + case 2: out << " (HxW): "; break; + case 3: out << " (DxHxW): "; break; + default: + CV_LOG_INFO(NULL, format("DNN/dumpLayerParameterSize(): Unsupported '%s' size = %d", name.c_str(), param.size())); + out << ": "; + } + for (size_t i = 0; i < param.size(); i++) + { + if (i > 0) + out << " x "; + out << param.get(i); + } + return out.str(); +} + +string Net::Impl::dump(bool forceAllocation) const +{ + bool hasInput = !netInputLayer->inputsData.empty(); + if (forceAllocation) + { + if (!netWasAllocated) + const_cast(this)->setUpNet(); + } + + std::ostringstream out; + const std::map& map = layers; + + Backend prefBackend = (Backend)preferableBackend; + std::vector> skippedLayers; + std::vector skipId; + std::vector allLayers(map.size(), -1); + int idPrev = -1; + Ptr prevNode; + for (std::map::const_reverse_iterator rit = map.rbegin(); rit != map.rend(); ++rit) + { + std::map>::const_iterator itBackend = rit->second.backendNodes.find(prefBackend); + if (prefBackend == DNN_BACKEND_OPENCV || itBackend == rit->second.backendNodes.end() || itBackend->second.empty()) + { + if (rit->second.skip) + skipId.push_back(rit->first); + else if (!skipId.empty()) + { + if (prefBackend == DNN_BACKEND_OPENCV || prevNode.empty()) + skipId.push_back(rit->first); + else if (idPrev != -1) + skipId.push_back(idPrev); + + std::sort(skipId.begin(), skipId.end()); + for (int i = 0; i < skipId.size(); i++) + { + allLayers[skipId[i]] = skippedLayers.size(); + } + skippedLayers.push_back(skipId); + skipId.clear(); + } + } + else + { + if (itBackend->second == prevNode) + skipId.push_back(idPrev); + else if (!skipId.empty()) + { + skipId.push_back(idPrev); + std::sort(skipId.begin(), skipId.end()); + for (int i = 0; i < skipId.size(); i++) + { + allLayers[skipId[i]] = skippedLayers.size(); + } + skippedLayers.push_back(skipId); + skipId.clear(); + } + idPrev = rit->first; + prevNode = itBackend->second; + } + } + std::vector colors = { "#ffffb3", "#fccde5", "#8dd3c7", "#bebada", "#80b1d3", "#fdb462", "#ff4848", "#b35151", "#b266ff", "#b266ff", "#3cb371"}; + string backend; + switch (prefBackend) + { + case DNN_BACKEND_DEFAULT: backend = "DEFAULT/"; break; + case DNN_BACKEND_HALIDE: backend = "HALIDE/"; break; + case DNN_BACKEND_INFERENCE_ENGINE: // fallthru + case DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019: // fallthru + case DNN_BACKEND_INFERENCE_ENGINE_NGRAPH: backend = "OpenVINO/"; break; + case DNN_BACKEND_OPENCV: backend = "OCV/"; break; + case DNN_BACKEND_VKCOM: backend = "VULKAN/"; break; + case DNN_BACKEND_CUDA: backend = "CUDA/"; break; + case DNN_BACKEND_WEBNN: backend = "WEBNN/"; break; + case DNN_BACKEND_TIMVX: backend = "TIMVX/"; break; + // don't use default: + } + out << "digraph G {\n"; + // Add nodes + for (std::map::const_iterator it = map.begin(); it != map.end(); ++it) + { + const LayerData& ld = it->second; + string name = ld.params.name; + std::vector clusterIds(1, it->first); + if (allLayers[it->first] == -1 && !name.empty()) + { + out << "\t\"" << name << "\" [label=\""; + } + else if (name.empty() || it->first != skippedLayers[allLayers[it->first]][0]) + { + continue; + } + else // first node in cluster : it->first == skippedLayers[allLayers[it->first]][0] + { + int cluster = allLayers[it->first]; + out << "\t\"" + << "cluster_" << cluster << "\" [label=\"{"; + clusterIds = skippedLayers[allLayers[it->first]]; // vertices in current cluster + } + for (int i = 0; i < clusterIds.size(); i++) + { + CV_DbgAssert(map.find(clusterIds[i]) != map.end()); + const LayerParams& lp = map.find(clusterIds[i])->second.params; + if (!lp.name.empty()) + { + if (i > 0) + { + out << " | "; + } + out << lp.name << "\\n" + << lp.type << "\\n"; // align center + if (lp.has("kernel_size")) + { + string kernel = dumpLayerParameterSize("kernel_size", lp); + out << kernel; + out << "\\l"; // align left + } + else if (lp.has("kernel_h") && lp.has("kernel_w")) + { + DictValue h = lp.get("kernel_h"); + DictValue w = lp.get("kernel_w"); + out << "kernel (HxW): " << h << " x " << w; + out << "\\l"; // align left + } + if (lp.has("stride")) + { + string stride = dumpLayerParameterSize("stride", lp); + out << stride; + out << "\\l"; // align left + } + else if (lp.has("stride_h") && lp.has("stride_w")) + { + DictValue h = lp.get("stride_h"); + DictValue w = lp.get("stride_w"); + out << "stride (HxW): " << h << " x " << w; + out << "\\l"; // align left + } + if (lp.has("dilation")) + { + string dilation = dumpLayerParameterSize("dilation", lp); + out << dilation; + out << "\\l"; // align left + } + else if (lp.has("dilation_h") && lp.has("dilation_w")) + { + DictValue h = lp.get("dilation_h"); + DictValue w = lp.get("dilation_w"); + out << "dilation (HxW): " << h << " x " << w; + out << "\\l"; // align left + } + if (lp.has("pad")) + { + DictValue pad = lp.get("pad"); + out << "pad "; + switch (pad.size()) + { + case 1: out << ": " << pad; break; + case 2: + out << "(HxW): (" << pad.get(0) << " x " << pad.get(1) << ")"; + break; + case 4: + out << "(HxW): (" << pad.get(0) << ", " << pad.get(2) + << ") x (" << pad.get(1) << ", " << pad.get(3) << ")"; + break; + case 6: + out << "(DxHxW): (" << pad.get(0) << ", " << pad.get(3) + << ") x (" << pad.get(1) << ", " << pad.get(4) + << ") x (" << pad.get(2) << ", " << pad.get(5) << ")"; + break; + default: CV_Error(Error::StsNotImplemented, format("Unsupported pad size = %d", pad.size())); + } + out << "\\l"; // align left + } + else if (lp.has("pad_l") && lp.has("pad_t") && lp.has("pad_r") && lp.has("pad_b")) + { + DictValue l = lp.get("pad_l"); + DictValue t = lp.get("pad_t"); + DictValue r = lp.get("pad_r"); + DictValue b = lp.get("pad_b"); + out << "pad (HxW): (" << t << ", " << b << ") x (" << l << ", " << r << ")"; + out << "\\l"; // align left + } + else if (lp.has("pooled_w") || lp.has("pooled_h")) + { + DictValue h = lp.get("pooled_h"); + DictValue w = lp.get("pooled_w"); + out << "pad pooled (HxW): " << h << " x " << w; + out << "\\l"; // align left + } + if (lp.has("pool")) + { + out << "pool: " << lp.get("pool"); + out << "\\l"; // align left + } + if (lp.has("global_pooling")) + { + out << "global_pooling: " << lp.get("global_pooling"); + out << "\\l"; // align left + } + if (lp.has("group")) + { + out << "group: " << lp.get("group"); + out << "\\l"; // align left + } + } + } + if (!ld.outputBlobs.empty()) + { + out << "output: " << ld.outputBlobs[0].size; + out << "\\l"; // align left + } + + Ptr layerBackend; + std::map>::const_iterator ibn = ld.backendNodes.find(prefBackend); + if (ibn != ld.backendNodes.end()) + layerBackend = ibn->second; + out << (!layerBackend.empty() ? backend : "OCV/"); + int colorId = 0; + const Target target = ld.layerInstance.empty() + ? DNN_TARGET_CPU + : (Target)(ld.layerInstance->preferableTarget); // TODO fix preferableTarget type + switch (target) + { + case DNN_TARGET_CPU: + out << "CPU"; + colorId = layerBackend.empty() ? 0 : 5; + break; + case DNN_TARGET_OPENCL: + out << "OCL"; + colorId = 1; + break; + case DNN_TARGET_OPENCL_FP16: + out << "OCL_FP16"; + colorId = 2; + break; + case DNN_TARGET_MYRIAD: + out << "MYRIAD"; + colorId = 3; + break; + case DNN_TARGET_HDDL: + out << "HDDL"; + colorId = 8; + break; + case DNN_TARGET_VULKAN: + out << "VULKAN"; + colorId = 7; + break; + case DNN_TARGET_FPGA: + out << "FPGA"; + colorId = 4; + break; + case DNN_TARGET_CUDA: + out << "CUDA"; + colorId = 5; + break; + case DNN_TARGET_CUDA_FP16: + out << "CUDA_FP16"; + colorId = 6; + break; + case DNN_TARGET_NPU: + out << "NPU"; + colorId = 9; + break; + // don't use default: + } + CV_Assert(colorId < colors.size()); + out << "\\n"; // align center + out << ((clusterIds.size() == 1) ? "\" " : " }\" "); + out << "fillcolor=\"" << colors[colorId] << "\" "; + out << "style=filled "; + out << "shape=" << ((clusterIds.size() == 1) ? "box" : "record") << "]\n"; + } + out << '\n'; + // Add edges + int inputsSize = hasInput ? netInputLayer->outNames.size() : 0; + for (std::map::const_iterator it = map.begin(); it != map.end(); ++it) + { + const LayerData& ld = it->second; + if (allLayers[it->first] == -1) // node + { + for (int i = 0; i < ld.consumers.size(); i++) + { + int outId = ld.consumers[i].lid; + if (it == map.begin() && inputsSize > 1) + out << "\t\"" << ld.name << "_" << i << "\"" + << " -> "; + else + out << "\t\"" << ld.name << "\"" + << " -> "; + if (allLayers[outId] == -1) // node + { + CV_DbgAssert(map.find(outId) != map.end()); + out << "\"" << map.find(outId)->second.name << "\"\n"; + } + else // cluster + { + out << "\"" + << "cluster_" << allLayers[outId] << "\"\n"; + } + } + } + else if (it->first == skippedLayers[allLayers[it->first]].back()) // edges from last layer in cluster + { + for (int i = 0; i < ld.consumers.size(); i++) + { + int outId = ld.consumers[i].lid; + if (allLayers[outId] == -1) // node + { + CV_DbgAssert(map.find(outId) != map.end()); + out << "\t\"" + << "cluster_" << allLayers[it->first] << "\"" + << " -> "; + out << "\"" << map.find(outId)->second.name << "\"\n"; + } + else if (allLayers[outId] != allLayers[it->first]) + { // another cluster + out << "\t\"" + << "cluster_" << allLayers[it->first] << "\"" + << " -> "; + out << "\"" + << "cluster_" << allLayers[outId] << "\"\n"; + } + } + } + } + out << "}\n"; + return out.str(); +} + + +void Net::Impl::dumpNetworkToFile() const +{ +#ifndef OPENCV_DNN_DISABLE_NETWORK_AUTO_DUMP + string dumpFileNameBase = getDumpFileNameBase(); + string dumpFileName = dumpFileNameBase + ".dot"; + try + { + string dumpStr = dump(); + std::ofstream out(dumpFileName.c_str(), std::ios::out | std::ios::binary); + out << dumpStr; + } + catch (const std::exception& e) + { + std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out); + out << "Exception: " << e.what() << std::endl; + } + catch (...) + { + std::ofstream out((dumpFileName + ".error").c_str(), std::ios::out); + out << "Can't dump: unknown exception" << std::endl; + } +#endif +} + + +std::vector> Net::Impl::getLayerInputs(int layerId) const +{ + LayerData& ld = getLayerData(layerId); + + std::vector> inputLayers; + inputLayers.reserve(ld.inputBlobsId.size()); + for (int i = 0; i < ld.inputBlobsId.size(); ++i) + { + inputLayers.push_back(getLayer(ld.inputBlobsId[i].lid)); + } + return inputLayers; +} + +std::vector Net::Impl::getLayerNames() const +{ + std::vector res; + res.reserve(layers.size()); + + Impl::MapIdToLayerData::const_iterator it; + for (it = layers.begin(); it != layers.end(); it++) + { + if (it->second.id) // skip Data layer + res.push_back(it->second.name); + } + + return res; +} + + +// FIXIT drop "unconnected" API +std::vector Net::Impl::getUnconnectedOutLayers() const +{ + std::vector layersIds; + + // registerOutput() flow + if (!outputNameToId.empty()) + { + for (std::map::const_iterator it = outputNameToId.begin(); it != outputNameToId.end(); ++it) + { + layersIds.push_back(it->second); + } + return layersIds; + } + + Impl::MapIdToLayerData::const_iterator it; + for (it = layers.begin(); it != layers.end(); it++) + { + int lid = it->first; + const LayerData& ld = it->second; + + if (ld.requiredOutputs.size() == 0) + layersIds.push_back(lid); + } + + return layersIds; +} + + +// FIXIT drop "unconnected" API +std::vector Net::Impl::getUnconnectedOutLayersNames() /*const*/ +{ + std::vector ids = getUnconnectedOutLayers(); + const size_t n = ids.size(); + std::vector names(n); + for (size_t i = 0; i < n; ++i) + { + names[i] = layers[ids[i]].name; + } + return names; +} + + +int64 Net::Impl::getFLOPS(const std::vector& netInputShapes) /*const*/ +{ + int64 flops = 0; + std::vector ids; + std::vector> inShapes, outShapes; + getLayersShapes(netInputShapes, ids, inShapes, outShapes); + CV_Assert(inShapes.size() == outShapes.size()); + CV_Assert(inShapes.size() == ids.size()); + + for (int i = 0; i < ids.size(); i++) + { + flops += layers[ids[i]].getLayerInstance()->getFLOPS(inShapes[i], outShapes[i]); + } + + return flops; +} + + +int64 Net::Impl::getFLOPS( + const int layerId, + const std::vector& netInputShapes) /*const*/ +{ + Impl::MapIdToLayerData::const_iterator layer = layers.find(layerId); + CV_Assert(layer != layers.end()); + + LayerShapes shapes; + getLayerShapes(netInputShapes, layerId, shapes); + + return const_cast(layer->second).getLayerInstance()->getFLOPS(shapes.in, shapes.out); +} + + +void Net::Impl::getMemoryConsumption( + const int layerId, + const std::vector& netInputShapes, + size_t& weights, size_t& blobs) /*const*/ +{ + Impl::MapIdToLayerData::const_iterator layer = layers.find(layerId); + CV_Assert(layer != layers.end()); + + weights = blobs = 0; + + for (int i = 0; i < layer->second.params.blobs.size(); i++) + { + const Mat& weightsBlob = layer->second.params.blobs[i]; + weights += weightsBlob.total() * weightsBlob.elemSize(); + } + + LayerShapes shapes; + getLayerShapes(netInputShapes, layerId, shapes); + const ShapesVec& outLayerShapes = shapes.out; + + // FIXIT netWasQuantized check is not enough - per layer check should be done + size_t elemSize = netWasQuantized ? sizeof(char) : sizeof(float); + for (int i = 0; i < outLayerShapes.size(); i++) + { + blobs += total(outLayerShapes[i]) * elemSize; + } +} + + +void Net::Impl::getMemoryConsumption( + const std::vector& netInputShapes, + size_t& weights, size_t& blobs) /*const*/ +{ + std::vector layerIds; + std::vector w, b; + getMemoryConsumption(netInputShapes, layerIds, w, b); + + weights = blobs = 0; + for (int i = 0; i < layerIds.size(); i++) + { + weights += w[i]; + blobs += b[i]; + } +} + + +int64 Net::Impl::getPerfProfile(std::vector& timings) const +{ + timings = std::vector(layersTimings.begin() + 1, layersTimings.end()); + int64 total = (int64)std::accumulate(timings.begin(), timings.end(), 0.0); + return total; +} + +void Net::Impl::getMemoryConsumption( + const std::vector& netInputShapes, + std::vector& layerIds, std::vector& weights, + std::vector& blobs) /*const*/ +{ + layerIds.clear(); + weights.clear(); + blobs.clear(); + + std::vector> inLayerShapes, outLayerShapes; + + getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes); + // FIXIT netWasQuantized check is not enough - per layer check should be done + size_t elemSize = netWasQuantized ? sizeof(char) : sizeof(float); + for (int i = 0; i < layerIds.size(); i++) + { + int w = 0, b = 0; + Impl::MapIdToLayerData::const_iterator layer = layers.find(layerIds[i]); + CV_Assert(layer != layers.end()); + + for (int j = 0; j < layer->second.params.blobs.size(); j++) + { + const Mat& weightsBlob = layer->second.params.blobs[j]; + w += weightsBlob.total() * weightsBlob.elemSize(); + } + + for (int j = 0; j < outLayerShapes[i].size(); j++) + { + b += total(outLayerShapes[i][j]) * elemSize; + } + + weights.push_back(w); + blobs.push_back(b); + } +} + + +// TODO drop? +void Net::Impl::getLayerTypes(std::vector& layersTypes) const +{ + layersTypes.clear(); + + std::map layers_type_map; + for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++) + { + if (layers_type_map.find(it->second.type) == layers_type_map.end()) + layers_type_map[it->second.type] = 0; + layers_type_map[it->second.type]++; + } + + for (std::map::const_iterator it = layers_type_map.begin(); it != layers_type_map.end(); it++) + { + layersTypes.push_back(it->first); + } +} + + +// TODO drop? +int Net::Impl::getLayersCount(const String& layerType) const +{ + int count = 0; + for (Impl::MapIdToLayerData::const_iterator it = layers.begin(); + it != layers.end(); it++) + { + if (it->second.type == layerType) + count++; + } + return count; +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/net_impl.hpp b/modules/dnn/src/net_impl.hpp new file mode 100644 index 0000000000..9dc96fe82d --- /dev/null +++ b/modules/dnn/src/net_impl.hpp @@ -0,0 +1,270 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef __OPENCV_DNN_SRC_NET_IMPL_HPP__ +#define __OPENCV_DNN_SRC_NET_IMPL_HPP__ + +#include "op_halide.hpp" +#include "op_inf_engine.hpp" +#include "ie_ngraph.hpp" +#include "op_vkcom.hpp" +#include "op_cuda.hpp" +#include "op_webnn.hpp" +#include "op_timvx.hpp" + +#include +#include +#include + +#include + +#include + +#include "layer_internals.hpp" // LayerPin LayerData DataLayer + +#include "legacy_backend.hpp" // wrapMat BlobManager OpenCLBackendWrapper + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + +using std::make_pair; +using std::string; + +// NB: Implementation is divided between of multiple .cpp files +struct Net::Impl : public detail::NetImplBase +{ + typedef std::map LayersShapesMap; + typedef std::map MapIdToLayerData; + + Impl(); + + Ptr netInputLayer; + std::vector blobsToKeep; + MapIdToLayerData layers; + std::map layerNameToId; + std::map outputNameToId; // use registerOutput() to populate outputs + BlobManager blobManager; + int preferableBackend; + int preferableTarget; + String halideConfigFile; + bool skipInfEngineInit; + bool hasDynamicShapes; + // Map host data to backend specific wrapper. + std::map> backendWrappers; + + int lastLayerId; + + bool netWasAllocated; + bool netWasQuantized; + bool fusion; + bool isAsync; + std::vector layersTimings; + + + bool empty() const; + void setPreferableBackend(int backendId); + void setPreferableTarget(int targetId); + + // FIXIT use inheritance + Ptr wrap(Mat& host); + + + void clear(); + + void setUpNet(const std::vector& blobsToKeep_ = std::vector()); + + + Ptr getLayer(int layerId) const; + Ptr getLayer(const LayerId& layerId) const; + + int getLayerId(const String& layerName) const; + + int getLayerId(int id) const; + + int getLayerId(DictValue& layerDesc) const; + + String getLayerName(int id) const; + + LayerData& getLayerData(int id) const; + + LayerData& getLayerData(const String& layerName) const; + + LayerData& getLayerData(const DictValue& layerDesc) const; + + static void addLayerInput(LayerData& ld, int inNum, LayerPin from); + + int resolvePinOutputName(LayerData& ld, const String& outName) const; + + LayerPin getPinByAlias(const String& layerName) const; + + std::vector getLayerOutPins(const String& layerName) const; + + // FIXIT remove dtype + int addLayer(const String& name, const String& type, const int& dtype, LayerParams& params); + + int addLayerToPrev(const String& name, const String& type, const int& dtype, LayerParams& params); + + + void connect(int outLayerId, int outNum, int inLayerId, int inNum); + + int registerOutput(const std::string& outputName, int layerId, int outputPort); + + // FIXIT drop "unconnected" API + std::vector getUnconnectedOutLayers() const; + std::vector getUnconnectedOutLayersNames() /*const*/; + + + void setInputsNames(const std::vector& inputBlobNames); + void setInputShape(const String& inputName, const MatShape& shape); + void setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean); + Mat getParam(int layer, int numParam) const; + void setParam(int layer, int numParam, const Mat& blob); + std::vector> getLayerInputs(int layerId) const; + std::vector getLayerNames() const; + + + // TODO drop? + void getLayerTypes(std::vector& layersTypes) const; + int getLayersCount(const String& layerType) const; + + + // FIXIT use inheritance + void initBackend(const std::vector& blobsToKeep_); + + void setHalideScheduler(const String& scheduler); +#ifdef HAVE_HALIDE + void compileHalide(); + void initHalideBackend(); +#endif + +#ifdef HAVE_DNN_NGRAPH + void addNgraphOutputs(LayerData& ld); + void initNgraphBackend(const std::vector& blobsToKeep_); +#endif + +#ifdef HAVE_WEBNN + void addWebnnOutputs(LayerData& ld); + void initWebnnBackend(const std::vector& blobsToKeep_); +#endif + +#ifdef HAVE_VULKAN + void initVkComBackend(); +#endif + +#ifdef HAVE_TIMVX + // Create timVxInfo for reserve tvGraphList. + TimVXInfo timVxInfo = TimVXInfo(); + void tvUpdateConfictMap(int graphIndex, LayerData& ld, std::vector >& graphConflictMap); + void tvConvertToOutputNode(const LayerData& ld, Ptr& targetWrap); + void initTimVXBackend(); +#endif + +#ifdef HAVE_CUDA + struct CudaInfo_t + { + CudaInfo_t(cuda4dnn::csl::CSLContext ctxt, cuda4dnn::csl::Stream d2h_stream_) + : context(std::move(ctxt)) + , d2h_stream(std::move(d2h_stream_)) + {} + cuda4dnn::csl::CSLContext context; + cuda4dnn::csl::Stream d2h_stream; + cuda4dnn::csl::Workspace workspace; + }; + + std::unique_ptr cudaInfo; + + void initCUDABackend(const std::vector& blobsToKeep_); +#endif + + void allocateLayer(int lid, const LayersShapesMap& layersShapes); + + // TODO add getter + void enableFusion(bool fusion_); + + void fuseLayers(const std::vector& blobsToKeep_); + + void allocateLayers(const std::vector& blobsToKeep_); + + void forwardLayer(LayerData& ld); + + void forwardToLayer(LayerData& ld, bool clearFlags = true); + + Mat forward(const String& outputName); + AsyncArray forwardAsync(const String& outputName); + void forward(OutputArrayOfArrays outputBlobs, const String& outputName); + void forward(OutputArrayOfArrays outputBlobs, + const std::vector& outBlobNames); + void forward(std::vector>& outputBlobs, + const std::vector& outBlobNames); + + + void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes); + + void getLayersShapes( + const ShapesVec& netInputShapes, + std::vector& layersIds, + std::vector& inLayersShapes, + std::vector& outLayersShapes) /*const*/; + + void getLayersShapes(const ShapesVec& netInputShapes, + LayersShapesMap& inOutShapes); + + void getLayerShapes(const ShapesVec& netInputShapes, + const int layerId, + LayerShapes& shapes); + + void updateLayersShapes(); + + int64 getFLOPS(const std::vector& netInputShapes) /*const*/; + int64 getFLOPS( + const int layerId, + const std::vector& netInputShapes) /*const*/; + + void getMemoryConsumption( + const int layerId, + const std::vector& netInputShapes, + size_t& weights, size_t& blobs) /*const*/; + void getMemoryConsumption( + const std::vector& netInputShapes, + size_t& weights, size_t& blobs) /*const*/; + void getMemoryConsumption( + const std::vector& netInputShapes, + std::vector& layerIds, std::vector& weights, + std::vector& blobs) /*const*/; + int64 getPerfProfile(std::vector& timings) const; + + // TODO drop + LayerPin getLatestLayerPin(const std::vector& pins) const; + + Mat getBlob(const LayerPin& pin) const; + + Mat getBlob(String outputName) const; + +#ifdef CV_CXX11 + AsyncArray getBlobAsync(const LayerPin& pin); + + AsyncArray getBlobAsync(String outputName); +#endif // CV_CXX11 + +#ifdef HAVE_INF_ENGINE + static + Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet); +#endif + + string dump(bool forceAllocation = false) const; + + void dumpNetworkToFile() const; + + // FIXIT drop from inference API + Net quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype) /*const*/; + void getInputDetails(std::vector& scales, std::vector& zeropoints) /*const*/; + void getOutputDetails(std::vector& scales, std::vector& zeropoints) /*const*/; + +}; // Net::Impl + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn +#endif // __OPENCV_DNN_SRC_NET_IMPL_HPP__ diff --git a/modules/dnn/src/net_impl_backend.cpp b/modules/dnn/src/net_impl_backend.cpp new file mode 100644 index 0000000000..e26126d86c --- /dev/null +++ b/modules/dnn/src/net_impl_backend.cpp @@ -0,0 +1,214 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "net_impl.hpp" +#include "legacy_backend.hpp" + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +Ptr Net::Impl::wrap(Mat& host) +{ + if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU) + return Ptr(); + + MatShape shape(host.dims); + for (int i = 0; i < host.dims; ++i) + shape[i] = host.size[i]; + + void* data = host.data; + if (backendWrappers.find(data) != backendWrappers.end()) + { + Ptr baseBuffer = backendWrappers[data]; + if (preferableBackend == DNN_BACKEND_OPENCV) + { +#ifdef HAVE_OPENCL + CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget)); + return OpenCLBackendWrapper::create(baseBuffer, host); +#else + CV_Error(Error::StsInternal, ""); +#endif + } + else if (preferableBackend == DNN_BACKEND_HALIDE) + { + CV_Assert(haveHalide()); +#ifdef HAVE_HALIDE + return Ptr(new HalideBackendWrapper(baseBuffer, shape)); +#endif + } + else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + CV_ERROR_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019; + } + else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + return wrapMat(preferableBackend, preferableTarget, host); + } + else if (preferableBackend == DNN_BACKEND_WEBNN) + { +#ifdef HAVE_WEBNN + return wrapMat(preferableBackend, preferableTarget, host); +#endif + } + else if (preferableBackend == DNN_BACKEND_VKCOM) + { +#ifdef HAVE_VULKAN + return Ptr(new VkComBackendWrapper(baseBuffer, host)); +#endif + } + else if (preferableBackend == DNN_BACKEND_CUDA) + { + CV_Assert(haveCUDA()); +#ifdef HAVE_CUDA + switch (preferableTarget) + { + case DNN_TARGET_CUDA: + return CUDABackendWrapperFP32::create(baseBuffer, shape); + case DNN_TARGET_CUDA_FP16: + return CUDABackendWrapperFP16::create(baseBuffer, shape); + default: + CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget)); + } +#endif + } + else if (preferableBackend == DNN_BACKEND_TIMVX) + { +#ifdef HAVE_TIMVX + return Ptr(new TimVXBackendWrapper(baseBuffer, host)); +#endif + } + else + CV_Error(Error::StsNotImplemented, "Unknown backend identifier"); + } + + Ptr wrapper = wrapMat(preferableBackend, preferableTarget, host); + backendWrappers[data] = wrapper; + return wrapper; +} + + +void Net::Impl::initBackend(const std::vector& blobsToKeep_) +{ + CV_TRACE_FUNCTION(); + if (preferableBackend == DNN_BACKEND_OPENCV) + { + CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget)); + } + else if (preferableBackend == DNN_BACKEND_HALIDE) + { +#ifdef HAVE_HALIDE + initHalideBackend(); +#else + CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Halide"); +#endif + } + else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { +#ifdef HAVE_DNN_NGRAPH + initNgraphBackend(blobsToKeep_); +#else + CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of OpenVINO"); +#endif + } + else if (preferableBackend == DNN_BACKEND_WEBNN) + { +#ifdef HAVE_WEBNN + initWebnnBackend(blobsToKeep_); +#else + CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of WebNN"); +#endif + } + else if (preferableBackend == DNN_BACKEND_VKCOM) + { +#ifdef HAVE_VULKAN + initVkComBackend(); +#else + CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Vulkan"); +#endif + } + else if (preferableBackend == DNN_BACKEND_CUDA) + { +#ifdef HAVE_CUDA + initCUDABackend(blobsToKeep_); +#else + CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of CUDA/CUDNN"); +#endif + } + else if (preferableBackend == DNN_BACKEND_TIMVX) + { +#ifdef HAVE_TIMVX + initTimVXBackend(); +#else + CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of TimVX"); +#endif + } + else + { + CV_Error(Error::StsNotImplemented, cv::format("Unknown backend identifier: %d", preferableBackend)); + } +} + + +void Net::Impl::setPreferableBackend(int backendId) +{ + if (backendId == DNN_BACKEND_DEFAULT) + backendId = (Backend)getParam_DNN_BACKEND_DEFAULT(); + + if (netWasQuantized && backendId != DNN_BACKEND_OPENCV && backendId != DNN_BACKEND_TIMVX) + { + CV_LOG_WARNING(NULL, "DNN: Only default and TIMVX backends support quantized networks"); + backendId = DNN_BACKEND_OPENCV; + } + +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE) + backendId = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; +#endif + + if (preferableBackend != backendId) + { + preferableBackend = backendId; + clear(); + } +} + +void Net::Impl::setPreferableTarget(int targetId) +{ + if (netWasQuantized && targetId != DNN_TARGET_CPU && + targetId != DNN_TARGET_OPENCL && targetId != DNN_TARGET_OPENCL_FP16 && targetId != DNN_TARGET_NPU) + { + CV_LOG_WARNING(NULL, "DNN: Only CPU, OpenCL/OpenCL FP16 and NPU targets are supported by quantized networks"); + targetId = DNN_TARGET_CPU; + } + + if (preferableTarget != targetId) + { + preferableTarget = targetId; + if (IS_DNN_OPENCL_TARGET(targetId)) + { +#ifndef HAVE_OPENCL +#ifdef HAVE_INF_ENGINE + if (preferableBackend == DNN_BACKEND_OPENCV) +#else + if (preferableBackend == DNN_BACKEND_DEFAULT || + preferableBackend == DNN_BACKEND_OPENCV) +#endif // HAVE_INF_ENGINE + preferableTarget = DNN_TARGET_CPU; +#else + bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16"); + if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16) + preferableTarget = DNN_TARGET_OPENCL; +#endif + } + clear(); + } +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/net_impl_fuse.cpp b/modules/dnn/src/net_impl_fuse.cpp new file mode 100644 index 0000000000..753c00de90 --- /dev/null +++ b/modules/dnn/src/net_impl_fuse.cpp @@ -0,0 +1,608 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "net_impl.hpp" + +#ifdef HAVE_CUDA +#include "cuda4dnn/primitives/eltwise.hpp" // required by fuseLayers +#endif + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +void Net::Impl::enableFusion(bool fusion_) +{ + if (fusion != fusion_) + { + fusion = fusion_; + clear(); + } +} + + +#if 0 +#define printf_(args) printf args +#else +#define printf_(args) +#endif + + +void Net::Impl::fuseLayers(const std::vector& blobsToKeep_) +{ + CV_TRACE_FUNCTION(); + + if(!fusion || (preferableBackend != DNN_BACKEND_OPENCV && + preferableBackend != DNN_BACKEND_CUDA && + preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && + preferableBackend != DNN_BACKEND_TIMVX)) + return; + +#if 0 // FIXIT mode without fusion is broken due to unsupported layers and handling of "custom" nodes + if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + return; +#endif + + // scan through all the layers. If there is convolution layer followed by the activation layer, + // we try to embed this activation into the convolution and disable separate execution of the activation + + // FIXIT replace by layersToKeep to avoid hacks like "LayerPin(lid, 0)" + std::set pinsToKeep(blobsToKeep_.begin(), + blobsToKeep_.end()); + for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++) + { + int lid = it->first; + LayerData& ld = layers[lid]; + if (ld.skip) + { + printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str())); + continue; + } + printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str())); + + // the optimization #1. try to fuse batch norm, scaling and/or activation layers + // with the current layer if they follow it. Normally, the are fused with the convolution layer, + // but some of them (like activation) may be fused with fully-connected, elemwise (+) and + // some other layers. + Ptr& currLayer = ld.layerInstance; + if (ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0) + { + LayerData* nextData = &layers[ld.consumers[0].lid]; + LayerPin lpNext(ld.consumers[0].lid, 0); + while (nextData) + { +#ifdef HAVE_INF_ENGINE + if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && pinsToKeep.count(lpNext) != 0) + { + CV_LOG_DEBUG(NULL, "DNN/IE: skip fusing with 'output' node: " << nextData->name << "@" << nextData->type); + break; + } +#endif + /* we use `tryFuse` member of convolution layer to fuse eltwise later + * it's not intended to be fused here; hence, we stop when we encounter eltwise + */ + if (preferableBackend == DNN_BACKEND_CUDA && ld.type == "Convolution" && nextData->type == "Eltwise") + break; + Ptr nextLayer = nextData->layerInstance; + if (currLayer->tryFuse(nextLayer)) + { + printf_(("\tfused with %s\n", nextLayer->name.c_str())); + nextData->skip = true; + ld.outputBlobs = layers[lpNext.lid].outputBlobs; + ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; + if (nextData->consumers.size() == 1) + { + int nextLayerId = nextData->consumers[0].lid; + nextData = &layers[nextLayerId]; + lpNext = LayerPin(nextLayerId, 0); + } + else + { + nextData = 0; + break; + } + } + else + break; + } + + if (preferableBackend != DNN_BACKEND_OPENCV && preferableBackend != DNN_BACKEND_CUDA) + continue; // Go to the next layer. + + // TODO: OpenCL target support more fusion styles. + if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) && + (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" && + ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" && + ld.layerInstance->type != "Concat")) ) + continue; + + if (preferableBackend == DNN_BACKEND_CUDA && IS_DNN_CUDA_TARGET(preferableTarget) + && ld.layerInstance->type != "Convolution" + && ld.layerInstance->type != "Concat") + continue; + + while (nextData) + { + // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh + if (IS_DNN_OPENCL_TARGET(preferableTarget) && + nextData->type != "ReLU" && + nextData->type != "ChannelsPReLU" && + nextData->type != "ReLU6" && + nextData->type != "TanH" && + nextData->type != "Power") + break; + + Ptr nextActivLayer = nextData->layerInstance.dynamicCast(); + if (nextActivLayer.empty()) + break; + + if (currLayer->setActivation(nextActivLayer)) + { + printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); + nextData->skip = true; + ld.outputBlobs = layers[lpNext.lid].outputBlobs; + ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; + if (nextData->consumers.size() == 1) + { + int nextLayerId = nextData->consumers[0].lid; + nextData = &layers[nextLayerId]; + lpNext = LayerPin(nextLayerId, 0); + } + else + { + nextData = 0; + break; + } + } + else + break; + } + + // OpenCL: fuse convolution layer followed by eltwise + relu + // CUDA: fuse convolution layer followed by eltwise (and optional activation) + while (nextData && + (IS_DNN_OPENCL_TARGET(preferableTarget) || IS_DNN_CUDA_TARGET(preferableTarget)) && + ld.layerInstance->type == "Convolution" + ) // semantic of 'if' + { + Ptr nextEltwiseLayer = nextData->layerInstance.dynamicCast(); + if (nextEltwiseLayer.empty()) + break; + +#ifdef HAVE_CUDA + // CUDA backend supports fusion with eltwise sum (without variable channels) + if (IS_DNN_CUDA_TARGET(preferableTarget) && !nextEltwiseLayer.empty()) + { + // we create a temporary backend node for eltwise layer to obtain the eltwise configuration + cuda4dnn::csl::CSLContext context; // assume that initCUDA and EltwiseOp do not use the context during init + const auto node = nextData->layerInstance->initCUDA(&context, nextData->inputBlobsWrappers, nextData->outputBlobsWrappers); + auto eltwiseNode = node.dynamicCast(); + + // broadcasting not supported in fused ops + auto required_shape = shape(nextData->outputBlobs[0]); + for (int i = 0; i < nextData->inputBlobs.size(); i++) + { + if (shape(*nextData->inputBlobs[i]) != required_shape) + { + eltwiseNode.reset(); + break; + } + } + + // CUDA backend uses EltwiseOp when all operands have the same number of channels; otherwise, ShortcutOp is used. + // Hence, a successful cast to EltwiseOp implies that the number of channels is same in all operand tensors. + if (eltwiseNode.empty() || eltwiseNode->op != cuda4dnn::EltwiseOpType::SUM || !eltwiseNode->coeffs.empty()) + break; + } +#endif + + if (IS_DNN_OPENCL_TARGET(preferableTarget) && pinsToKeep.count(lpNext) != 0) + break; + if (nextData->inputBlobsId.size() != 2) + break; + + if (IS_DNN_OPENCL_TARGET(preferableTarget)) + { + if (!nextData->params.has("operation") || toLowerCase(nextData->params.get("operation")) == "sum") + { + if (nextData->params.has("coeff")) + { + DictValue paramCoeff = nextData->params.get("coeff"); + int n = paramCoeff.size(); + bool isCoeffOneOne = (n == 2); + for (int i = 0; isCoeffOneOne && i < n; i++) + { + float c = paramCoeff.get(i); + isCoeffOneOne &= (c == 1.0f); + } + if (!isCoeffOneOne) + { + CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion of 'Sum' without coeffs (or {1.0, 1.0}) is supported only"); + break; + } + } + } + else + { + CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion with eltwise operation is not supported: " << nextData->params.get("operation")); + break; + } + } + + { + LayerData *eltwiseData = nextData; + + // Eltwise layer has two inputs. We need to determine which + // is a base convolution layer and which could be used as it's bias. + LayerData* biasLayerData = 0; + for (int i = 0; i < 2; ++i) + { + LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[i].lid]; + CV_Assert(downLayerData); + while (downLayerData->skip) + { + if (downLayerData->inputBlobsId.size() == 1) + downLayerData = &layers[downLayerData->inputBlobsId[0].lid]; + else + { + downLayerData = 0; + break; + } + } + if (downLayerData && ld.id == downLayerData->id) + { + biasLayerData = &layers[eltwiseData->inputBlobsId[1 - i].lid]; + break; + } + } + CV_Assert(biasLayerData); + { + // fuse eltwise + activation layer + // bias must already be computed to fuse => bias layer must appear before convolution + if (biasLayerData->id < ld.id) + { + /* we can fuse activation if: + * => activation layer that follows is the only consumer of eltwise output + * => activation layer does not process multiple inputs + * => we do not require to keep the output of eltwise + */ + Ptr nextFusabeleActivLayer; + if (eltwiseData->consumers.size() == 1 && pinsToKeep.count(lpNext) == 0) + { + nextData = &layers[eltwiseData->consumers[0].lid]; + lpNext = LayerPin(eltwiseData->consumers[0].lid, 0); + CV_Assert(nextData); + if (nextData->outputBlobs.size() == 1) + nextFusabeleActivLayer = nextData->layerInstance.dynamicCast(); + } + else + { + // OCL backend cannot fuse in this case but the CUDA backend can continue with just eltwise + nextData = 0; + } + + // the requirements of OCV OpenCL backend and CUDA backend are different + // we need to check them separately; hence, the fuse variables + bool fuse_eltwise = false, fuse_activation = false; + + Ptr activ_power; + if (IS_DNN_OPENCL_TARGET(preferableTarget) && !nextFusabeleActivLayer.empty() && + nextData && + (!nextData->type.compare("ReLU") || + !nextData->type.compare("ChannelsPReLU") || + (!nextData->type.compare("Power") && (activ_power = nextFusabeleActivLayer.dynamicCast()) && activ_power->scale == 1.0f) + ) && + currLayer->setActivation(nextFusabeleActivLayer)) + { + fuse_eltwise = true; + fuse_activation = true; + } + + if (IS_DNN_CUDA_TARGET(preferableTarget)) + { + /* supported fusion options: + * => convolution + eltwise + * => activation(convolution) + eltwise + * > convolution + activation would have been fused already; we have to fuse eltwise + * => activation(convolution + eltwise) + * > fuse eltwise and then activation + */ + auto layer = nextEltwiseLayer.staticCast(); + if (currLayer->tryFuse(layer)) + { + fuse_eltwise = true; /* eltwise was successfully fused */ + if (!nextFusabeleActivLayer.empty() && nextData) + { + if ((!nextData->type.compare("ReLU") || + !nextData->type.compare("ReLU6") || + !nextData->type.compare("Power") || + !nextData->type.compare("TanH") || + !nextData->type.compare("Sigmoid") || + !nextData->type.compare("Swish") || + !nextData->type.compare("Mish")) && + currLayer->setActivation(nextFusabeleActivLayer)) + { + // activation was fused + fuse_activation = true; + } + } + } + } + + CV_Assert(!fuse_activation || fuse_eltwise); /* cannot fuse activation without eltwise */ + if(fuse_eltwise && fuse_activation) + { + CV_Assert(nextData); + CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1); + ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]); + printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); + printf_(("\tfused with %s\n", nextFusabeleActivLayer->name.c_str())); + eltwiseData->skip = true; + nextData->skip = true; + // This optimization for cases like + // some_layer conv + // | | + // +-- eltwise --+ + // | + // activ + // This way all the element-wise computations + // (i.e. some_layer+conv or some_layer*conv) + // would be done at [conv] layer. So we need to + // replace [conv]'s output blob to [eltwise]'s one + // considering that [activ] is an in-place layer. + // Also we need to move all the consumers' references. + // To prevent memory collisions (i.e. when input of + // [conv] and output of [eltwise] is the same blob) + // we allocate a new blob. + CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1); + ld.outputBlobs[0] = ld.outputBlobs[0].clone(); + ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]); + + eltwiseData->outputBlobs = ld.outputBlobs; + nextData->outputBlobs = ld.outputBlobs; + eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers; + nextData->outputBlobsWrappers = ld.outputBlobsWrappers; + + // Move references of [activ] layer consumers to the newly allocated blob. + for (int i = 0; i < nextData->consumers.size(); ++i) + { + LayerData& consumer = layers[nextData->consumers[i].lid]; + for (int j = 0; j < consumer.inputBlobsId.size(); ++j) + { + if (consumer.inputBlobsId[j].lid == lpNext.lid) + { + consumer.inputBlobs[j] = &ld.outputBlobs[0]; + consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0]; + break; + } + } + } + } + else if (fuse_eltwise) // conv + eltwise (note: conv could have fused activations before eltwise) + { + CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget)); + CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1); + ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]); + printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); + eltwiseData->skip = true; + // This optimization is for cases like + // some_layer conv (maybe fused with activ) + // | | + // +-- eltwise --+ + // + // This way all the element-wise computations + // (i.e. some_layer+conv or some_layer*conv) + // would be done at [conv] layer. So we need to + // replace [conv]'s output blob to [eltwise]'s one. + // Also we need to move all the consumers' references. + // To prevent memory collisions (i.e. when input of + // [conv] and output of [eltwise] is the same blob) + // we allocate a new blob. + CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1); + ld.outputBlobs[0] = ld.outputBlobs[0].clone(); + ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]); + + eltwiseData->outputBlobs = ld.outputBlobs; + eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers; + + // Move references of [eltwise] layer consumers to the newly allocated blob. + for (int i = 0; i < eltwiseData->consumers.size(); ++i) + { + LayerData& consumer = layers[eltwiseData->consumers[i].lid]; + for (int j = 0; j < consumer.inputBlobsId.size(); ++j) + { + if (consumer.inputBlobsId[j].lid == eltwiseData->id) + { + consumer.inputBlobs[j] = &ld.outputBlobs[0]; + consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0]; + break; + } + } + } + } + } + } + } + + break; + } + } + + if (preferableBackend != DNN_BACKEND_OPENCV && preferableBackend != DNN_BACKEND_CUDA) + continue; // Go to the next layer. + + // the optimization #2. if there is concat layer that concatenates channels + // from the inputs together (i.e. axis == 1) then we make the inputs of + // the concat layer to write to the concatenation output buffer + // (and so we eliminate the concatenation layer, because the channels + // are concatenated implicitly). + Ptr concatLayer = ld.layerInstance.dynamicCast(); + if( !concatLayer.empty() && !concatLayer->padding && ld.outputBlobs.size() == 1 ) + { + Mat& output = ld.outputBlobs[0]; + UMat umat_output; +#ifdef HAVE_OPENCL + if (!ld.outputBlobsWrappers.empty() && + (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))) + { + size_t i, ninputs = ld.inputBlobsId.size(); + bool conv_layer = true; + for( i = 0; i < ninputs; i++ ) + { + LayerPin pin = ld.inputBlobsId[i]; + LayerData* inp_i_data = &layers[pin.lid]; + while(inp_i_data->skip && + inp_i_data->inputBlobsId.size() == 1 && + inp_i_data->consumers.size() == 1) + { + pin = inp_i_data->inputBlobsId[0]; + inp_i_data = &layers[pin.lid]; + } + conv_layer = conv_layer && (inp_i_data->getLayerInstance()->type == "Convolution"); + } + if (!conv_layer) + continue; + std::vector umat_outputBlobs; + umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers); + umat_output = umat_outputBlobs[0]; + } +#endif + + // TODO: in general, this optimization can always be done, but + // many layers currently check that the input/output blobs are + // continuous arrays. Unfortunately, this is not true when + // the concatenation optimization is applied with batch_size > 1. + // so, for now, we only apply this optimization in the most popular + // case batch_size == 1. + int axis = normalize_axis(concatLayer->axis, output.dims); + if( output.total(0, axis) == 1 ) + { + size_t i, ninputs = ld.inputBlobsId.size(); + std::vector realinputs(ninputs); + for( i = 0; i < ninputs; i++ ) + { + LayerPin pin = ld.inputBlobsId[i]; + LayerData* inp_i_data = &layers[pin.lid]; + while(inp_i_data->skip && + inp_i_data->inputBlobsId.size() == 1 && + inp_i_data->consumers.size() == 1) + { + pin = inp_i_data->inputBlobsId[0]; + inp_i_data = &layers[pin.lid]; + } + printf_(("\treal input for %s is %s\n", + layers[ld.inputBlobsId[i].lid].getLayerInstance()->name.c_str(), + inp_i_data->getLayerInstance()->name.c_str())); + + if(inp_i_data->skip || inp_i_data->consumers.size() != 1) + break; +#ifdef HAVE_CUDA + if (preferableBackend == DNN_BACKEND_CUDA && + (inp_i_data->layerInstance->supportBackend(DNN_BACKEND_CUDA) == false || + (inp_i_data->layerInstance->type != "Convolution" && + inp_i_data->layerInstance->type != "Pooling" && + inp_i_data->layerInstance->type != "Resize" && + inp_i_data->layerInstance->type != "Flatten" && + inp_i_data->layerInstance->type != "Permute" && + inp_i_data->layerInstance->type != "Reorg" && + inp_i_data->layerInstance->type != "Eltwise" && + inp_i_data->layerInstance.dynamicCast().empty()))) + { + break; + } +#endif + realinputs[i] = pin; + } + + if( i >= ninputs ) + { + // Allocate new memory to prevent collisions during memory + // reusing (see https://github.com/opencv/opencv/pull/10456). + output = output.clone(); +#ifdef HAVE_OPENCL + if (preferableBackend == DNN_BACKEND_OPENCV && + IS_DNN_OPENCL_TARGET(preferableTarget)) + { + std::vector umats(1); + umat_output = umat_output.clone(); + umats[0] = umat_output; + OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umats); + } +#endif + +#ifdef HAVE_CUDA + if (preferableBackend == DNN_BACKEND_CUDA) + ld.outputBlobsWrappers[0] = wrap(output); +#endif + std::vector chrange(output.dims, Range::all()); + int ofs = 0; + for( i = 0; i < ninputs; i++ ) + { + LayerPin pin = realinputs[i]; + LayerData* inp_i_data = &layers[pin.lid]; + int channels_i = ld.inputBlobs[i]->size[axis]; + chrange[axis] = Range(ofs, ofs + channels_i); + printf_(("\toutput %s(%d) to channels (%d, %d)\n", inp_i_data->layerInstance->name.c_str(), + pin.oid, ofs, ofs + channels_i)); + ofs += channels_i; + Mat output_slice = output(chrange); + Mat& curr_output = inp_i_data->outputBlobs[pin.oid]; + CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size); + Mat* oldPtr = &curr_output; + curr_output = output_slice; +#ifdef HAVE_OPENCL + if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)) + { + std::vector umats(inp_i_data->outputBlobsWrappers.size()); + umats[pin.oid] = umat_output(chrange); + OpenCLBackendWrapper::update(inp_i_data->outputBlobsWrappers, umats); + } +#endif +#ifdef HAVE_CUDA + if (preferableBackend == DNN_BACKEND_CUDA) + { + auto cuda_wrapper = wrap(output).dynamicCast(); + auto offset = chrange[axis].start * output_slice.total(axis + 1, output.dims); + auto new_shape = shape(output_slice); + cuda_wrapper->update(new_shape, offset); + inp_i_data->outputBlobsWrappers[pin.oid] = cuda_wrapper.staticCast(); + } +#endif + // Layers that refer old input Mat will refer to the + // new data but the same Mat object. + CV_Assert_N(curr_output.data == output_slice.data, oldPtr == &curr_output); + } + +#ifdef HAVE_CUDA + if (preferableBackend == DNN_BACKEND_CUDA) + { + for (int i = 0; i < ld.consumers.size(); i++) + { + LayerData& consumer = layers[ld.consumers[i].lid]; + for (int j = 0; j < consumer.inputBlobsId.size(); j++) + { + if (consumer.inputBlobsId[j].lid == ld.id) + { + CV_Assert(consumer.inputBlobs[j]->data == ld.outputBlobs[0].data); + consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0]; + break; + } + } + } + } +#endif + ld.skip = true; + printf_(("\toptimized out Concat layer %s\n", concatLayer->name.c_str())); + } + } + } + } +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/net_openvino.cpp b/modules/dnn/src/net_openvino.cpp new file mode 100644 index 0000000000..a546b0237d --- /dev/null +++ b/modules/dnn/src/net_openvino.cpp @@ -0,0 +1,568 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include + +#include +#include + +#include "net_impl.hpp" + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + +#ifdef HAVE_INF_ENGINE + + +/** mark input pins as outputs from other subnetworks + * FIXIT must be done by DNN engine not ngraph. + */ +void Net::Impl::addNgraphOutputs(LayerData& ld) +{ + CV_TRACE_FUNCTION(); + + CV_LOG_DEBUG(NULL, "DNN/IE: layer of new subnet: " << ld.name << "@" << ld.type); + + Ptr layerNet; + auto it = ld.backendNodes.find(preferableBackend); + if (it != ld.backendNodes.end()) + { + Ptr node = it->second; + if (!node.empty()) + { + Ptr ieNode = node.dynamicCast(); + CV_Assert(!ieNode.empty()); + CV_Assert(!ieNode->net.empty()); + layerNet = ieNode->net; + } + } + + for (int i = 0; i < ld.inputBlobsId.size(); ++i) + { + LayerData& inpLd = layers[ld.inputBlobsId[i].lid]; + Ptr inpNode = inpLd.backendNodes[preferableBackend]; + if (!inpNode.empty()) + { + Ptr ieInpNode = inpNode.dynamicCast(); + CV_Assert(!ieInpNode.empty()); + CV_Assert(!ieInpNode->net.empty()); + if (layerNet != ieInpNode->net) + { + CV_LOG_DEBUG(NULL, "DNN/IE: pin output between subnets: " << ieInpNode->node->get_friendly_name()); + ieInpNode->net->addOutput(ieInpNode); + } + } + } +} + +void Net::Impl::initNgraphBackend(const std::vector& blobsToKeep_) +{ + CV_TRACE_FUNCTION(); + CV_CheckEQ(preferableBackend, DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, ""); + + Ptr net; + + for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); ++it) + { + const LayerData& ld = it->second; + if (ld.id == 0) + { + CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) || + (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size())); + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]); + std::string outputName = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]; + outputName = ld.outputBlobsWrappers.size() > 1 ? (outputName + "." + std::to_string(i)) : outputName; + dataPtr->setName(outputName); + } + } + else + { + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]); + std::string outputName = ld.outputBlobsWrappers.size() > 1 ? (ld.name + "." + std::to_string(i)) : ld.name; + dataPtr->setName(outputName); + } + } + } + + if (skipInfEngineInit) + { + Ptr node = layers[lastLayerId].backendNodes[preferableBackend]; + CV_Assert(!node.empty()); + + Ptr ieNode = node.dynamicCast(); + CV_Assert(!ieNode.empty()); + + CV_Assert(ieNode->net); + InfEngineNgraphNet& ienet = *ieNode->net; + ienet.reset(); + + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) + { + LayerData& ld = it->second; + if (ld.id == 0) + { + for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i) + { + InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.inputBlobsWrappers[i]); + dataPtr->setName(netInputLayer->outNames[i]); + } + } + else + { + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + auto it = ienet.outputsDesc.find(ld.name); + if (it != ienet.outputsDesc.end()) + { + const InferenceEngine::TensorDesc& descriptor = it->second; + InferenceEngine::DataPtr dataPtr = ngraphDataOutputNode(ld.outputBlobsWrappers[i], descriptor, ld.name); + dataPtr->setName(ld.name); + } + else + { + InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]); + dataPtr->setName(ld.name); + } + } + } + ienet.addBlobs(ld.inputBlobsWrappers); + ienet.addBlobs(ld.outputBlobsWrappers); + ld.skip = true; + } + layers[lastLayerId].skip = false; + ienet.init((Target)preferableTarget); + return; + } + + bool supportsCPUFallback = !isArmComputePlugin() && (preferableTarget == DNN_TARGET_CPU || + openvino::checkTarget(DNN_TARGET_CPU)); + + // Build Inference Engine networks from sets of layers that support this + // backend. Split a whole model on several Inference Engine networks if + // some of layers are not implemented. + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) + { + LayerData& ld = it->second; + + CV_LOG_DEBUG(NULL, "DNN/IE: processing layer " << ld.name << "@" << ld.type << " (" << ld.id << ") ..."); + + if (ld.id == 0 && ld.skip) + { + CV_LOG_DEBUG(NULL, "DNN/IE: SKIP!"); + continue; + } + + bool fused = ld.skip; + Ptr layer = ld.layerInstance; + if (!fused && !layer->supportBackend(preferableBackend)) + { + CV_LOG_DEBUG(NULL, "DNN/IE: NOT supported!"); + bool customizable = ld.id != 0 && supportsCPUFallback; + + // TODO: there is a bug in Myriad plugin with custom layers shape infer. + if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL) + { + for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i) + { + customizable = ld.inputBlobs[i]->size[0] == 1; + } + } + + // TODO: fix these workarounds + if (preferableTarget == DNN_TARGET_MYRIAD || + preferableTarget == DNN_TARGET_HDDL || + preferableTarget == DNN_TARGET_OPENCL || + preferableTarget == DNN_TARGET_OPENCL_FP16) + customizable &= ld.type != "Concat"; + + if (preferableTarget == DNN_TARGET_OPENCL || + preferableTarget == DNN_TARGET_OPENCL_FP16) + customizable &= ld.type != "Power"; + + if (preferableTarget == DNN_TARGET_OPENCL) + customizable &= ld.type != "Eltwise"; + + if (!customizable) + { + CV_LOG_DEBUG(NULL, "DNN/IE: NOT customizable!"); + addNgraphOutputs(ld); + net = Ptr(); + layer->preferableTarget = DNN_TARGET_CPU; + + for (int i = 0; i < ld.inputBlobsId.size(); ++i) + { + LayerData& inpLd = layers[ld.inputBlobsId[i].lid]; + Ptr inpNode = inpLd.backendNodes[preferableBackend]; + if (!inpNode.empty()) + { + Ptr ieNode = inpNode.dynamicCast(); + CV_Assert(!ieNode.empty()); + ieNode->net->addOutput(ieNode); + } + } + continue; + } + } + ld.skip = true; // Initially skip all Inference Engine supported layers. + + // Create a new network if one of inputs from different Inference Engine graph. + std::vector> inputNodes; + for (int i = 0; i < ld.inputBlobsId.size(); ++i) + { + // Layer_Test_ROIPooling.Accuracy has 2 inputs inpLD = 0, 0 -> has 4 inputNodes (input, rois, input, rois) + if (inputNodes.size() == ld.inputBlobsId.size()) + { + break; + } + LayerData& inpLd = layers[ld.inputBlobsId[i].lid]; + Ptr inpNode = inpLd.backendNodes[preferableBackend]; + if (!inpNode.empty()) + { + Ptr ieInpNode = inpNode.dynamicCast(); + CV_Assert(!ieInpNode.empty()); + CV_Assert(!ieInpNode->net.empty()); + if (ieInpNode->net == net && !fused) + { + inputNodes.push_back(inpNode); + continue; + } + } + + if (net.empty()) + { + net = Ptr(new InfEngineNgraphNet(*this)); + } + + if (!fused) + { + std::vector inputNames; + std::vector inputs; + + auto curr_pos = inpLd.consumers.begin(); + auto compare = [&ld](const LayerPin& lp) { return lp.lid == ld.id; }; + auto cons = curr_pos; + while ((cons = std::find_if(curr_pos, inpLd.consumers.end(), compare)) != + inpLd.consumers.end()) { + int cons_inp = cons->oid; + Ptr inpWrapper = inpLd.outputBlobsWrappers[cons_inp]. + dynamicCast(); + CV_Assert(!inpWrapper.empty()); + auto iter = std::find(inputNames.begin(), inputNames.end(), + inpWrapper->dataPtr->getName()); + if (iter == inputNames.end()) + { + inputNames.push_back(inpWrapper->dataPtr->getName()); + inputs.push_back(inpLd.outputBlobs[cons_inp]); + } + curr_pos = cons + 1; + } + + auto inps = net->setInputs(inputs, inputNames); + for (auto& inp : inps) + { + inputNodes.emplace_back(Ptr(new InfEngineNgraphNode(inp))); + } + } + } + + Ptr node; + if (!net.empty()) + { + if (fused) + { + bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 && + ld.inputBlobs[0]->data == ld.outputBlobs[0].data; + CV_Assert(inPlace); + node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend]; + ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers; + } + } + else + { + net = Ptr(new InfEngineNgraphNet(*this)); + } + + if (!fused) + { + CV_Assert(ld.inputBlobsId.size() == inputNodes.size()); + for (int i = 0; i < ld.inputBlobsId.size(); ++i) + { + int lid = ld.inputBlobsId[i].lid; + int oid = ld.inputBlobsId[i].oid; + if (oid == 0 || lid == 0) + continue; + + auto ieInpNode = inputNodes[i].dynamicCast(); + const auto& ngraph_input_node = ieInpNode->node; + CV_LOG_DEBUG(NULL, "DNN/IE: bind output port " << lid << ":" << oid << " (" << ngraph_input_node->get_friendly_name() << ":" << ngraph_input_node->get_type_info().name << ")"); + + // Handle parameters from other subnets. Output port is not used in this case + if ((ngraph::op::is_parameter(ngraph_input_node) || ngraph::op::is_constant(ngraph_input_node)) && + ngraph_input_node->get_output_size() == 1) + { + inputNodes[i] = Ptr(new InfEngineNgraphNode(ngraph_input_node)); + continue; + } + CV_CheckLT((size_t)oid, ngraph_input_node->get_output_size(), ""); +#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4) + // FIXIT refactor ".initNgraph()" API to use Output + // WA: use Concat to emulate Identity operation with requested output port + auto oid_node = std::make_shared(ngraph::OutputVector { ngraph_input_node->output(oid) }, 0); + inputNodes[i] = Ptr(new InfEngineNgraphNode(oid_node)); +#elif INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_3) + inputNodes[i] = Ptr(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid))); +#else + inputNodes[i] = Ptr(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid, false))); +#endif + } + + if (layer->supportBackend(preferableBackend)) + { + CV_LOG_DEBUG(NULL, "DNN/IE: wrap layer " << ld.name << "@" << ld.type << " - outputs: " << ld.outputBlobsWrappers.size()); + node = layer->initNgraph(ld.inputBlobsWrappers, inputNodes); +#if 0 // FIXIT doesn't work with multiple outputs (set name is applied to the same node) + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]); + node.dynamicCast()->setName(dataPtr->getName()); + } +#else + node.dynamicCast()->setName(layer->name); +#endif + } + else + { + CV_LOG_DEBUG(NULL, "DNN/IE: layer is not supported: " << ld.name << "@" << ld.type); + node = Ptr(new InfEngineNgraphNode(inputNodes, + ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals)); + } + } + else if (node.empty()) + { + CV_LOG_DEBUG(NULL, "DNN/IE: node.empty() bypass..."); + continue; + } + + ld.backendNodes[preferableBackend] = node; + + Ptr ieNode = node.dynamicCast(); + CV_Assert(!ieNode.empty()); + ieNode->net = net; + + for (const auto& pin : blobsToKeep_) + { + if (pin.lid == ld.id) + { + ieNode->net->addOutput(ieNode); + break; + } + } + ieNode->net->setNodePtr(&ieNode->node); + + net->addBlobs(ld.inputBlobsWrappers); + net->addBlobs(ld.outputBlobsWrappers); + addNgraphOutputs(ld); + } + + // Initialize all networks. + for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it) + { + LayerData& ld = it->second; + auto iter = ld.backendNodes.find(preferableBackend); + if (iter == ld.backendNodes.end()) + continue; + + Ptr& node = iter->second; + if (node.empty()) + continue; + + Ptr ieNode = node.dynamicCast(); + if (ieNode.empty()) + continue; + + CV_Assert(!ieNode->net.empty()); + + if (!ieNode->net->isInitialized()) + { + ieNode->net->addOutput(ieNode); + ieNode->net->createNet((Target)preferableTarget); + ld.skip = false; + } + } +} + +//} // Net::Impl + +/*static*/ +Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet) +{ + CV_TRACE_FUNCTION(); + + CV_TRACE_REGION("register_inputs"); + + std::vector inputsNames; + std::vector inp_shapes; + for (auto& it : ieNet.getInputsInfo()) + { + inputsNames.push_back(it.first); + std::vector dims = it.second->getTensorDesc().getDims(); + inp_shapes.push_back(std::vector(dims.begin(), dims.end())); + } + + Net cvNet; + cvNet.setInputsNames(inputsNames); + + // set empty input to determine input shapes + for (int inp_id = 0; inp_id < inputsNames.size(); ++inp_id) + { + cvNet.setInputShape(inputsNames[inp_id], inp_shapes[inp_id]); + } + + CV_TRACE_REGION_NEXT("backendNode"); + + Ptr backendNode; + { + auto fake_node = std::make_shared(ngraph::element::f32, ngraph::Shape {}); + Ptr backendNodeNGraph(new InfEngineNgraphNode(fake_node)); + backendNodeNGraph->net = Ptr(new InfEngineNgraphNet(*(cvNet.impl), ieNet)); + backendNode = backendNodeNGraph; + } + + CV_TRACE_REGION_NEXT("register_outputs"); + + auto ngraphFunction = ieNet.getFunction(); + CV_Assert(ngraphFunction); + std::vector> ngraphOperations = ngraphFunction->get_ops(); + + for (auto& it : ieNet.getOutputsInfo()) + { + CV_TRACE_REGION("output"); + const auto& outputName = it.first; + + LayerParams lp; + int lid = cvNet.addLayer(it.first, "", lp); + + LayerData& ld = cvNet.impl->layers[lid]; + + { + Ptr cvLayer(new NgraphBackendLayer(ieNet)); + cvLayer->name = outputName; + cvLayer->type = "_unknown_"; + + auto process_layer = [&](const std::string& name) -> bool + { + CV_TRACE_REGION("ngraph_function"); + for (const auto& op : ngraphOperations) + { + CV_Assert(op); + if (op->get_friendly_name() == name) + { + const std::string typeName = op->get_type_info().name; + cvLayer->type = typeName; + return true; + } + } + return false; + }; + + bool found = process_layer(outputName); + if (!found) + { + auto pos = outputName.rfind('.'); // cut port number: ".0" + if (pos != std::string::npos) + { + std::string layerName = outputName.substr(0, pos); + found = process_layer(layerName); + } + } + if (!found) + CV_LOG_WARNING(NULL, "DNN/IE: Can't determine output layer type: '" << outputName << "'"); + + ld.layerInstance = cvLayer; + ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NGRAPH] = backendNode; + } + + for (int i = 0; i < inputsNames.size(); ++i) + cvNet.connect(0, i, lid, i); + } + + CV_TRACE_REGION_NEXT("finalize"); + + cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); + + cvNet.impl->skipInfEngineInit = true; + return cvNet; +} +#endif // HAVE_INF_ENGINE + +Net Net::readFromModelOptimizer(const String& xml, const String& bin) +{ + CV_TRACE_FUNCTION(); +#ifndef HAVE_INF_ENGINE + CV_UNUSED(xml); CV_UNUSED(bin); + CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer."); +#else + + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + + InferenceEngine::Core& ie = getCore(""); + InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin); + + return Impl::createNetworkFromModelOptimizer(ieNet); +#endif // HAVE_INF_ENGINE +} + +Net Net::readFromModelOptimizer(const std::vector& bufferModelConfig, const std::vector& bufferWeights) +{ + CV_TRACE_FUNCTION(); + CV_Assert(!bufferModelConfig.empty()); + CV_Assert(!bufferWeights.empty()); + return readFromModelOptimizer(bufferModelConfig.data(), bufferModelConfig.size(), + bufferWeights.data(), bufferWeights.size()); +} + +Net Net::readFromModelOptimizer( + const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, + const uchar* bufferWeightsPtr, size_t bufferWeightsSize +) +{ + CV_TRACE_FUNCTION(); +#ifndef HAVE_INF_ENGINE + CV_UNUSED(bufferModelConfigPtr); CV_UNUSED(bufferWeightsPtr); + CV_UNUSED(bufferModelConfigSize); CV_UNUSED(bufferModelConfigSize); + CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer."); +#else + + FPDenormalsIgnoreHintScope fp_denormals_ignore_scope; + + InferenceEngine::Core& ie = getCore(""); + + std::string model; model.assign((char*)bufferModelConfigPtr, bufferModelConfigSize); + + InferenceEngine::CNNNetwork ieNet; + try + { + InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C); + InferenceEngine::Blob::CPtr weights_blob = InferenceEngine::make_shared_blob(tensorDesc, (uint8_t*)bufferWeightsPtr, bufferWeightsSize); + + ieNet = ie.ReadNetwork(model, weights_blob); + } + catch (const std::exception& e) + { + CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what()); + } + + return Impl::createNetworkFromModelOptimizer(ieNet); +#endif // HAVE_INF_ENGINE +} + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/net_quantization.cpp b/modules/dnn/src/net_quantization.cpp new file mode 100644 index 0000000000..ef1857a8e2 --- /dev/null +++ b/modules/dnn/src/net_quantization.cpp @@ -0,0 +1,298 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "net_impl.hpp" + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +// FIXIT drop from inference API +static +void getQuantizationParams(const Mat& src, std::vector& scales, std::vector& zeropoints) +{ + const int qmin = -128; // INT8_MIN + const int qmax = 127; // INT8_MAX + + double rmin, rmax, sc, zp; + cv::minMaxIdx(src, &rmin, &rmax); + + // 0 must be present in the range [rmin, rmax] + rmin = std::min(rmin, 0.0); + rmax = std::max(rmax, 0.0); + + sc = (rmax == rmin) ? 1.0 : (rmax - rmin)/(qmax - qmin); + zp = qmin - (rmin/sc); + + scales.push_back((float)sc); + zeropoints.push_back((int)std::round(zp)); +} + +// FIXIT drop from inference API +Net Net::Impl::quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype) +{ + // Net can be quantized only once. + if (netWasQuantized) + CV_Error(Error::StsBadArg, "Cannot quantize a quantized net"); + + CV_CheckType(inputsDtype, inputsDtype == CV_32F || inputsDtype == CV_8S, "Input depth should be CV_32F or CV_8S"); + CV_CheckType(outputsDtype, outputsDtype == CV_32F || outputsDtype == CV_8S, "Output depth should be CV_32F or CV_8S"); + + bool originalFusion = fusion; + int prefBackend = preferableBackend; + int prefTarget = preferableTarget; + + // Disable fusions and use CPU backend to quantize net + setPreferableBackend(DNN_BACKEND_OPENCV); + setPreferableTarget(DNN_TARGET_CPU); + enableFusion(false); + + if (calibData.isMat()) + { + setInput(calibData.getMat(), /*name=*/"", /*scalefactor=*/1.0, /*mean=*/Scalar()); + } + else if (calibData.isMatVector()) + { + std::vector calibDataVec; + calibData.getMatVector(calibDataVec); + + std::vector inpNames = netInputLayer->outNames; + CV_CheckEQ(calibDataVec.size(), inpNames.size(), "Calibration data size should be equal to number of inputs"); + for (int i = 0; i < calibDataVec.size(); i++) + setInput(calibDataVec[i], inpNames[i], /*scalefactor=*/1.0, /*mean=*/Scalar()); + } + + std::vector outNames = getUnconnectedOutLayersNames(); + std::vector pins; + for (int i = 0; i < outNames.size(); i++) + pins.push_back(getPinByAlias(outNames[i])); + setUpNet(pins); + + // Compute scales and zeropoints for all the layers + std::vector > scales; + std::vector > zeropoints; + for (Impl::MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) + { + LayerData& ld = it->second; + if (!ld.skip) + { + Ptr layer = ld.layerInstance; + std::vector inps(ld.inputBlobs.size()); + for (int i = 0; i < ld.inputBlobs.size(); ++i) + inps[i] = *ld.inputBlobs[i]; + layer->forward(inps, ld.outputBlobs, ld.internals); + } + + std::vector sc; + std::vector zp; + if (ld.type == "TanH") + { + sc.push_back(1.f/128); + zp.push_back(0); + } + else if (ld.type == "Sigmoid" || ld.type == "Softmax" || ld.type == "SoftMax") + { + if (ld.params.get("log_softmax", false)) + { + sc.push_back(16.f/256); + zp.push_back(127); + } + else + { + sc.push_back(1.f/256); + zp.push_back(-128); + } + } + else if (ld.type == "Split" || ld.type == "Slice" || ld.type == "Crop") + { + std::vector inp_sc; std::vector inp_zp; + getQuantizationParams(*ld.inputBlobs[0], inp_sc, inp_zp); + sc.assign(ld.outputBlobs.size(), inp_sc[0]); + zp.assign(ld.outputBlobs.size(), inp_zp[0]); + } + else + { + for (int i = 0; i < ld.outputBlobs.size(); i++) + getQuantizationParams(ld.outputBlobs[i], sc, zp); + } + scales.push_back(sc); + zeropoints.push_back(zp); + } + + // For some layers, the input and output scales/zeropoints must be equal so that rescaling of inputs + // is not needed during quantized inference. We start from the last layer and modify the layer's input scales/zeropoints + // TODO : Need a different approach. Current solution fails when 2 such layers have the same input layer + for (Impl::MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it) + { + LayerData& ld = it->second; + // Layers with multiple outputs. Number of outputs is equal to number of inputs + if (ld.type == "Blank" || ld.type == "Dropout" || ld.type == "Identity" || ld.type == "Silence" || + ld.type == "Flatten" || ld.type == "Padding" || ld.type == "Permute" || ld.type == "Reshape" || + ld.type == "ReLU6" || ld.type == "Reorg" || ld.type == "ShuffleChannel" || ld.type == "Resize" || + (ld.type == "ReLU" && !ld.params.get("negative_slope", 0.f)) || /* ReLU with negative slope 0 */ + (ld.type == "Reduce" && (toLowerCase(ld.params.get("reduce")) == "max" || + toLowerCase(ld.params.get("reduce")) == "min"))) + { + for (int i = 0; i < ld.outputBlobs.size(); i++) + { + LayerPin &pin = ld.inputBlobsId[i]; + scales[pin.lid][pin.oid] = scales[ld.id][i]; + zeropoints[pin.lid][pin.oid] = zeropoints[ld.id][i]; + } + } + // Layers with multiple inputs and single output. + else if ((ld.type == "Pooling" && toLowerCase(ld.params.get("pool", "max")) == "max") /* Max Pooling */ || + (ld.type == "Eltwise" && toLowerCase(ld.params.get("operation", "sum")) == "max") /* Elementwise max */ || + ld.type == "Concat") + { + for (int i = 0; i < ld.inputBlobsId.size(); i++) + { + LayerPin &pin = ld.inputBlobsId[i]; + scales[pin.lid][pin.oid] = scales[ld.id][0]; + zeropoints[pin.lid][pin.oid] = zeropoints[ld.id][0]; + } + } + } + + // Create a new Net and add quantized layers to it. + Net dstNet_; + Net::Impl& dstNet = *(dstNet_.impl); + dstNet.netWasQuantized = true; + dstNet.setInputsNames(netInputLayer->outNames); + dstNet.setPreferableBackend(prefBackend); + dstNet.setPreferableTarget(prefTarget); + dstNet.enableFusion(originalFusion); + + for (Impl::MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) + { + LayerData ld = it->second; + if (ld.id == 0) + { + LayerData &quantInpLd = dstNet.layers[0]; + quantInpLd.dtype = inputsDtype; + quantInpLd.params.set("scales", DictValue::arrayReal(scales[0].data(), scales[0].size())); + quantInpLd.params.set("zeropoints", DictValue::arrayInt(zeropoints[0].data(), zeropoints[0].size())); + continue; + } + + std::vector inpPins = ld.inputBlobsId; + // Fill input and output scales/zeropoints for the layer + std::vector > inp_out_sc(2); + std::vector > inp_out_zp(2); + for (int i = 0; i < inpPins.size(); i++) + { + LayerPin &pin = inpPins[i]; + inp_out_sc[0].push_back(scales[pin.lid][pin.oid]); + inp_out_zp[0].push_back(zeropoints[pin.lid][pin.oid]); + } + inp_out_sc[1] = scales[ld.id]; + inp_out_zp[1] = zeropoints[ld.id]; + + // Quantize layer + Ptr layer = ld.layerInstance; + if (layer->tryQuantize(inp_out_sc, inp_out_zp, ld.params)) + { + ld.type += "Int8"; + ld.dtype = CV_8S; + } + ld.params.set("scales", DictValue::arrayReal(inp_out_sc[1].data(), inp_out_sc[1].size())); + ld.params.set("zeropoints", DictValue::arrayInt(inp_out_zp[1].data(), inp_out_zp[1].size())); + + // Check and add quantize/dequantize node before layer + for (int i = 0; i < inpPins.size(); i++) + { + LayerPin &pin = inpPins[i]; + LayerData &inpLd = dstNet.getLayerData(getLayerName(pin.lid)); + pin.lid = inpLd.id; + if (inpLd.dtype != ld.dtype) + { + String layerName = (inpLd.dtype == CV_32F && ld.dtype == CV_8S) ? cv::format("quantize/%s/%d", inpLd.name.c_str(), pin.oid) + : cv::format("dequantize/%s/%d", inpLd.name.c_str(), pin.oid); + // Check if quantize/dequantize node for the input layer already exists + if (dstNet.getLayerId(layerName) >= 0) + { + pin.lid = dstNet.getLayerId(layerName); + pin.oid = 0; + } + else + { + LayerParams lp; + lp.set("scales", inp_out_sc[0][i]); + lp.set("zeropoints", inp_out_zp[0][i]); + lp.name = layerName; + lp.type = (inpLd.dtype == CV_32F && ld.dtype == CV_8S) ? "Quantize" : "Dequantize"; + int newLid = dstNet.addLayer(lp.name, lp.type, ld.dtype, lp); + dstNet.connect(pin.lid, pin.oid, newLid, 0); + pin.lid = newLid; pin.oid = 0; + } + } + } + + // Add quantized layer to Net and connect to its inputs. + int newLid = dstNet.addLayer(ld.name, ld.type, ld.dtype, ld.params); + for( int i = 0; i < inpPins.size(); i++ ) + dstNet.connect(inpPins[i].lid, inpPins[i].oid, newLid, i); + + // If the layer is a output layer, add quantize/dequantize node after it based on output's data type. + if (ld.requiredOutputs.size() == 0 && ld.dtype != outputsDtype) + { + LayerParams lp; + lp.set("scales", inp_out_sc[1][0]); + lp.set("zeropoints", inp_out_zp[1][0]); + lp.name = ((ld.dtype == CV_32F && outputsDtype == CV_8S) ? "quantize/" : "dequantize/") + ld.name; + lp.type = (ld.dtype == CV_32F && outputsDtype == CV_8S) ? "Quantize" : "Dequantize"; + dstNet.addLayerToPrev(lp.name, lp.type, outputsDtype, lp); + } + } + // Restore FP32 Net's backend, target and fusion + setPreferableBackend(prefBackend); + setPreferableTarget(prefTarget); + enableFusion(originalFusion); + return dstNet_; +} + +// FIXIT drop from inference API +void Net::Impl::getInputDetails(std::vector& scales, std::vector& zeropoints) /*const*/ +{ + if (!netWasQuantized) + CV_Error(Error::StsBadFunc, "Net isn't quantized"); + + LayerParams &lp = layers[0].params; + DictValue sc = lp.get("scales"); + DictValue zp = lp.get("zeropoints"); + + for (int i = 0; i < sc.size(); i++) + { + scales.push_back(sc.get(i)); + zeropoints.push_back(zp.get(i)); + } +} + +// FIXIT drop from inference API +void Net::Impl::getOutputDetails(std::vector& scales, std::vector& zeropoints) /*const*/ +{ + if (!netWasQuantized) + CV_Error(Error::StsBadFunc, "Net isn't quantized"); + + std::vector outLayerIds = getUnconnectedOutLayers(); + for (auto &lid : outLayerIds) + { + LayerParams &lp = layers[lid].params; + DictValue sc = lp.get("scales"); + DictValue zp = lp.get("zeropoints"); + + for (int i = 0; i < sc.size(); i++) + { + scales.push_back(sc.get(i)); + zeropoints.push_back(zp.get(i)); + } + } +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 7cfc546b12..2a440a1284 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -79,6 +79,14 @@ class ONNXImporter void expandMid(const std::string& prefix, opencv_onnx::NodeProto& node_proto, const std::string& input, size_t n); void addNegation(const LayerParams& layerParams, opencv_onnx::NodeProto& node_proto, int input_id); + void lstm_extractConsts(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, size_t idx, int* blobShape_, int size); + void lstm_add_reshape(const std::string& input_name, const std::string& output_name, int* layerShape, size_t n); + std::string lstm_add_slice(int index, const std::string& input_name, int* begin, int* end, size_t n); + std::string lstm_fix_dims(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, + int batch_size, int num_directions, int hidden_size, bool need_y, const std::string& y_name, + const int index); + void lstm_add_transform(int num_directions, int batch_size, int hidden_size, + int index, const std::string& input_name, const std::string& output_name); public: ONNXImporter(Net& net, const char *onnxFile); ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer); @@ -122,6 +130,7 @@ private: void parseMaxUnpool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseMaxPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseAveragePool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseGlobalPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseReduce (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseSlice (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseSplit (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); @@ -1087,7 +1096,7 @@ void ONNXImporter::parseAveragePool(LayerParams& layerParams, const opencv_onnx: addLayer(layerParams, node_proto); } -void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +void ONNXImporter::parseGlobalPool(LayerParams &layerParams, const opencv_onnx::NodeProto &node_proto_) { opencv_onnx::NodeProto node_proto = node_proto_; const std::string& layer_type = node_proto.op_type(); @@ -1096,157 +1105,176 @@ void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::Node CV_Assert(node_proto.input_size() == 1); layerParams.type = "Pooling"; String pool; - if (layer_type == "GlobalMaxPool" || layer_type == "ReduceMax") + if (layer_type == "GlobalMaxPool") pool = "MAX"; - else if (layer_type == "ReduceSum") - pool = "SUM"; - else + else if (layer_type == "GlobalAveragePool") pool = "AVE"; + else + CV_Error(Error::StsNotImplemented, "Unsupported Pooling type of " + layer_type + " operation."); + + CV_Assert(!layerParams.has("axes")); + layerParams.set("global_pooling", true); layerParams.set("pool", pool); - layerParams.set("global_pooling", !layerParams.has("axes")); - bool keepdims = layerParams.get("keepdims", 1) == 1; - if (layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax")) + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + const std::string& layer_type = node_proto.op_type(); + const std::string output_name = node_proto.output(0); + int depth = layerParams.get("depth", CV_32F); + + CV_Assert(node_proto.input_size() <= 2); + String reduceType; + + if (layer_type == "ReduceMax") + reduceType = "MAX"; + else if (layer_type == "ReduceMin") + reduceType = "MIN"; + else if (layer_type == "ReduceSum") + reduceType = "SUM"; + else if (layer_type == "ReduceSumSquare") + reduceType = "SUM_SQUARE"; + else if (layer_type == "ReduceProd") + reduceType = "PROD"; + else if (layer_type == "ReduceL1") + reduceType = "L1"; + else if (layer_type == "ReduceL2") + reduceType = "L2"; + else if (layer_type == "ReduceLogSum") + reduceType = "LOG_SUM"; + else if (layer_type == "ReduceLogSumExp") + reduceType = "LOG_SUM_EXP"; + else if (layer_type == "ReduceMean") + reduceType = "AVE"; + else + CV_Error(Error::StsNotImplemented, "Unsupported Pooling type of " + layer_type + " operation."); + + // The ReduceInt8 can only support "MAX" and "MIN". + if (depth == CV_8S) + { + CV_Assert(reduceType == "MAX" || reduceType == "MIN"); + } + + layerParams.type = (depth == CV_8S) ? "ReduceInt8" : "Reduce"; + layerParams.set("reduce", reduceType); + bool keepdims = layerParams.get("keepdims", 1) == 1; + + if (layer_type == "ReduceSum" && node_proto.input_size() == 2) + { + // TODO support the opset 13 of ReduceSum. + // in opset 13, the ReduceSum has two input, it takes axes as input instead of attribute + // details:https://github.com/onnx/onnx/issues/3420#issuecomment-844295687 + CV_Error(Error::StsNotImplemented, "Unsupported " + layer_type + " operation of opset 13, please try to " + "re-export the onnx model with opset 11."); + } + + MatShape inpShape = outShapes[node_proto.input(0)]; + std::vector shouldDelete(inpShape.size(), false); + + if (layerParams.has("axes")) { - MatShape inpShape = outShapes[node_proto.input(0)]; DictValue axes = layerParams.get("axes"); - MatShape targetShape; - std::vector shouldDelete(inpShape.size(), false); - for (int i = 0; i < axes.size(); i++) { + for (int i = 0; i < axes.size(); i++) + { int axis = normalize_axis(axes.get(i), inpShape.size()); shouldDelete[axis] = true; } - for (int axis = 0; axis < inpShape.size(); ++axis){ - if (!shouldDelete[axis]) - targetShape.push_back(inpShape[axis]); - else if (keepdims) - targetShape.push_back(1); - } - - if (inpShape.size() == 3 && axes.size() <= 2) - { - int axis = normalize_axis(axes.get(0), inpShape.size()); - CV_CheckNE(axis, 0, ""); - - LayerParams reshapeLp; - reshapeLp.name = layerParams.name + "/reshape"; - reshapeLp.type = "Reshape"; - CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); - reshapeLp.set("axis", 0); - reshapeLp.set("num_axes", 1); - int newShape[] = {1, -1}; - reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 2)); - - opencv_onnx::NodeProto proto; - proto.add_input(node_proto.input(0)); - proto.add_output(reshapeLp.name); - addLayer(reshapeLp, proto); - - LayerParams avgLp; - avgLp.name = layerParams.name + "/avg"; - avgLp.type = "Pooling"; - CV_Assert(layer_id.find(avgLp.name) == layer_id.end()); - avgLp.set("pool", pool); - if (axes.size() == 2) - { - CV_CheckEQ(normalize_axis(axes.get(0), inpShape.size()), 1, "Unsupported mode"); - CV_CheckEQ(normalize_axis(axes.get(1), inpShape.size()), 2, "Unsupported mode"); - avgLp.set("global_pooling", true); - } - else - { - avgLp.set(axis == 2 ? "global_pooling_w" : "global_pooling_h", true); - avgLp.set(axis == 2 ? "kernel_h" : "kernel_w", 1); - } - - node_proto.set_input(0, reshapeLp.name); - node_proto.set_output(0, avgLp.name); - addLayer(avgLp, node_proto); - } - else - { - if (inpShape.size() != 4 && inpShape.size() != 5) - CV_Error(Error::StsNotImplemented, "Unsupported input shape of " + layer_type + " operation."); - - CV_Assert(axes.size() <= inpShape.size() - 2); - std::vector kernel_size(inpShape.size() - 2, 1); - if (axes.size() == 1 && (normalize_axis(axes.get(0), inpShape.size()) <= 1)) - { - int axis = normalize_axis(axes.get(0), inpShape.size()); - MatShape newShape = inpShape; - newShape[axis + 1] = total(newShape, axis + 1); - newShape.resize(axis + 2); - newShape.insert(newShape.begin(), 2 - axis, 1); - - LayerParams reshapeLp; - reshapeLp.type = "Reshape"; - reshapeLp.name = layerParams.name + "/reshape"; - CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); - reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], newShape.size())); - - node_proto.set_output(0, reshapeLp.name); - addLayer(reshapeLp, node_proto); - - kernel_size.resize(2); - kernel_size[0] = inpShape[axis]; - node_proto.set_input(0, node_proto.output(0)); - } - else - { - for (int i = 0; i < axes.size(); i++) { - int axis = normalize_axis(axes.get(i), inpShape.size()); - CV_Assert_N(axis >= 2 + i, axis < inpShape.size()); - kernel_size[axis - 2] = inpShape[axis]; - } - } - - LayerParams poolLp = layerParams; - poolLp.name = layerParams.name + "/avg"; - CV_Assert(layer_id.find(poolLp.name) == layer_id.end()); - poolLp.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size())); - - node_proto.set_output(0, poolLp.name); - addLayer(poolLp, node_proto); - } - - layerParams.type = "Reshape"; - layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size())); - - node_proto.set_input(0, node_proto.output(0)); - node_proto.set_output(0, output_name); } - else if (!layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax")) + else { - IterShape_t shapeIt = outShapes.find(node_proto.input(0)); - CV_Assert(shapeIt != outShapes.end()); - const size_t dims = keepdims ? shapeIt->second.size() : 1; - - LayerParams reshapeLp; - reshapeLp.name = layerParams.name + "/reshape"; - reshapeLp.type = "Reshape"; - CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); - int newShape[] = {1, 1, 1, -1}; - reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 4)); - - opencv_onnx::NodeProto proto; - proto.add_input(node_proto.input(0)); - proto.add_output(reshapeLp.name); - addLayer(reshapeLp, proto); - - LayerParams poolLp = layerParams; - poolLp.name = layerParams.name + "/pool"; - CV_Assert(layer_id.find(poolLp.name) == layer_id.end()); - - node_proto.set_input(0, reshapeLp.name); - node_proto.set_output(0, poolLp.name); - addLayer(poolLp, node_proto); - - layerParams.type = "Reshape"; - std::vector targetShape(dims, 1); - layerParams.set("dim", DictValue::arrayInt(targetShape.data(), targetShape.size())); - - node_proto.set_input(0, node_proto.output(0)); - node_proto.set_output(0, output_name); + for (int i = 0; i < inpShape.size(); i++) + { + shouldDelete[i] = true; + } } + + MatShape targetShape; + for (int i = 0; i < inpShape.size(); ++i) + { + if (!shouldDelete[i]) + { + targetShape.push_back(inpShape[i]); + } + else if (keepdims) + { + targetShape.push_back(1); + } + } + + if (targetShape.empty()) + targetShape.push_back(1); + + // Using PermuteLayer to move the deleted axis to the last. + std::vector perm(inpShape.size(), 0); + for (int i = 0; i < inpShape.size(); i++) + perm[i] = i; + + bool needPermuet = false; + for (int i = 0; i < inpShape.size(); i++) + { + if (shouldDelete[i]) + { + // find the first not deleted element. + std::vector::iterator iter = std::find(shouldDelete.begin() + i, shouldDelete.end(), false); + + if (iter != shouldDelete.end()) + { + int index = iter - shouldDelete.begin(); + + bool temp = shouldDelete[index]; + shouldDelete[index] = shouldDelete[i]; + shouldDelete[i] = temp; + + std::swap(perm[index], perm[i]); + std::swap(inpShape[index], inpShape[i]); + needPermuet = true; + } + else + break; + } + } + + auto inputString= node_proto.input(0); + if (needPermuet) + { + LayerParams permuteLp; + permuteLp.name = layerParams.name + "/permute"; + permuteLp.type = (depth == CV_8S) ? "PermuteInt8" : "Permute"; + permuteLp.set("order", DictValue::arrayInt(perm.data(), perm.size())); + + opencv_onnx::NodeProto protoPermute; + protoPermute.add_input(inputString); + protoPermute.add_output(permuteLp.name); + addLayer(permuteLp, protoPermute); + inputString = permuteLp.name; + } + + std::vector deletedDims; + for (int axis_i = 0; axis_i < inpShape.size(); ++axis_i) + { + if (shouldDelete[axis_i]) + { + deletedDims.push_back(inpShape[axis_i]); + } + } + + LayerParams reduceLp = layerParams; + reduceLp.name = layerParams.name + "/reduce"; + CV_Assert(layer_id.find(reduceLp.name) == layer_id.end()); + reduceLp.set("deleted_dims", DictValue::arrayInt(&deletedDims[0], deletedDims.size())); + + node_proto.set_input(0, inputString); + node_proto.set_output(0, reduceLp.name); + addLayer(reduceLp, node_proto); + + layerParams.type = (depth == CV_8S) ? "ReshapeInt8" : "Reshape"; + layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size())); + + node_proto.set_input(0, node_proto.output(0)); + node_proto.set_output(0, output_name); + addLayer(layerParams, node_proto); } @@ -1275,13 +1303,12 @@ void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeP if (axis > 0) { CV_CheckLE(axis, 1024, "Slice layer can't have more than 1024 axes"); // arbitrary limit begin.resize(axis, 0); - end.resize(axis, -1); + end.resize(axis, INT_MAX); } for (int i = 0; i < starts.size(); ++i) { begin.push_back(starts.get(i)); - int finish = ends.get(i); - end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim + end.push_back(ends.get(i)); } } else { // inp_size > 1 CV_Assert(inp_size >= 3); @@ -1305,14 +1332,10 @@ void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeP const int* ends = end_blob.ptr(); if (axis > 0) { begin.resize(axis, 0); - end.resize(axis, -1); + end.resize(axis, INT_MAX); } std::copy(starts, starts + start_blob.total(), std::back_inserter(begin)); - for (int i = 0; i < end_blob.total(); ++i) - { - int finish = ends[i]; - end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim - } + std::copy(ends, ends + end_blob.total(), std::back_inserter(end)); if (inp_size == 5) { CV_Assert(constBlobs.find(node_proto.input(4)) != constBlobs.end()); @@ -1540,103 +1563,284 @@ void ONNXImporter::parseConstant(LayerParams& layerParams, const opencv_onnx::No addConstant(node_proto.output(0), layerParams.blobs[0]); } -void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +void transformBlobs(std::vector& blobs) { - opencv_onnx::NodeProto node_proto = node_proto_; - const std::string output_name = node_proto.output(0); - LayerParams lstmParams = layerParams; - lstmParams.name += "/lstm"; + Mat Wx = blobs[0]; + Mat Wh = blobs[1]; + Mat b = blobs[2]; + std::vector cudaWorkaround; + cudaWorkaround.push_back(Wx.clone()); + cudaWorkaround.push_back(Wh.clone()); + cudaWorkaround.push_back(b.clone()); - // https://pytorch.org/docs/stable/nn.html#lstm - CV_Assert(node_proto.input_size() >= 7); - Mat Wx = getBlob(node_proto, 1); - Mat Wh = getBlob(node_proto, 2); - Mat b = getBlob(node_proto, 3); + const int numHidden = Wh.size[2]; - const int numHidden = lstmParams.get("hidden_size"); - const int numDirs = Wx.size[0]; // Is 1 for forward only and 2 for bidirectional LSTM. - const int numFeatures = Wx.size[2]; - - // Following checks are deduced from the IFGO->IGFO loop below - // Wx is numDirs X numHidden*3 X numFeatures - // Wh is numDirs X numHidden*3 X numHidden - CV_CheckLE(numHidden * 3, Wx.size[1], "Wx should have beat least 3x hidden_size in dimension 1"); - CV_CheckLE(numHidden * 3, Wh.size[1], "Wh should have be at least 3x hidden_size in dimension 1"); - CV_CheckLE(numHidden, Wh.size[2], "Wh should have be at least hidden_size in dimension 2"); - - Mat h0, c0; - if (!node_proto.input(5).empty()) { - h0 = getBlob(node_proto, 5); - h0 = h0.reshape(1, h0.size[0] * h0.size[1]); - } else { - // initial_h attribute can be empty in case of keras2onnx producer. fill it with zeros - h0 = Mat::zeros(numDirs * numFeatures, numHidden, CV_32FC1); - } - if (!node_proto.input(6).empty()) { - c0 = getBlob(node_proto, 6); - c0 = c0.reshape(1, c0.size[0] * c0.size[1]); - } else { - // initial_c attribute can be empty in case of keras2onnx producer. fill it with zeros - c0 = Mat::zeros(numDirs * numFeatures, numHidden, CV_32FC1); - } + Mat h0 = blobs[3]; + h0 = h0.reshape(1, h0.size[0] * h0.size[1]); + Mat c0 = blobs[4]; + c0 = c0.reshape(1, c0.size[0] * c0.size[1]); b = b.reshape(1, b.size[0]); Mat bx = b.colRange(0, b.cols / 2); Mat bh = b.colRange(b.cols / 2, b.cols); b = bx + bh; - // b is numDirs X numHidden*3 - CV_CheckLE(numHidden * 3, b.cols, "Bias data should have at least 3x hidden_size columns"); + auto toIFOC = [] (Mat& in) { + int first = in.size[0]; + int rest = in.total() / first / 4; + // every weight blob contains weights for Input, Output, Forget and Cell gates + Mat m = in.reshape(1, {first, 4, rest}); + Mat outputGate = m.col(1); + Mat forgetGate = m.col(2); + std::swap_ranges(outputGate.begin(), outputGate.end(), forgetGate.begin()); + }; + + toIFOC(Wx); + toIFOC(Wh); + toIFOC(b); - // IFGO->IGFO - for (int k = 0; k < numDirs; ++k) - { - float* WxData = Wx.ptr(k); - float* WhData = Wh.ptr(k); - float* biasData = b.ptr(k); - for (int j = 0; j < numHidden; ++j) - { - for (int i = 0; i < numFeatures; ++i) - { - std::swap(WxData[(numHidden + j) * numFeatures + i], - WxData[(numHidden * 2 + j) * numFeatures + i]); - } - for (int i = 0; i < numHidden; ++i) - { - std::swap(WhData[(numHidden + j) * numHidden + i], - WhData[(numHidden * 2 + j) * numHidden + i]); - } - std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]); - } - } Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]); Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]); + blobs[0] = Wh; + blobs[1] = Wx; + blobs[2] = b.reshape(1, 1); + blobs[3] = h0; + blobs[4] = c0; - lstmParams.blobs.resize(5); - lstmParams.blobs[0] = Wh; - lstmParams.blobs[1] = Wx; - lstmParams.blobs[2] = b; - lstmParams.blobs[3] = h0; - lstmParams.blobs[4] = c0; + if (blobs.size() == 5) { + // so that future patch removing copies can leave all indexing as is + blobs.insert(blobs.begin(), cudaWorkaround.begin(), cudaWorkaround.end()); + return; + } - // read direction attribute - lstmParams.set("reverse", lstmParams.get("direction", "") == "reverse"); - lstmParams.set("bidirectional", lstmParams.get("direction", "") == "bidirectional"); + Mat P = blobs[5]; + blobs[5] = P.colRange(0, numHidden); + blobs[5] = blobs[5].clone().reshape(1, blobs[5].total()); // Single column. + blobs[5] = Mat::diag(blobs[5]); - node_proto.set_output(0, lstmParams.name); // set different name so output shapes will be registered on that name - addLayer(lstmParams, node_proto); + blobs.push_back(P.colRange(numHidden, 2 * numHidden)); + blobs[6] = blobs[6].clone().reshape(1, blobs[6].total()); // Single column. + blobs[6] = Mat::diag(blobs[6]); - MatShape lstmShape = outShapes[node_proto.output(0)]; + blobs.push_back(P.colRange(2 * numHidden, 3 * numHidden)); + blobs[7] = blobs[7].clone().reshape(1, blobs[7].total()); // Single column. + blobs[7] = Mat::diag(blobs[7]); - // Add fake 1 as it is done in ONNX - lstmShape.insert(lstmShape.begin() + 1, 1); + // so that future patch removing copies can leave all indexing as is + blobs.insert(blobs.begin(), cudaWorkaround.begin(), cudaWorkaround.end()); +} - layerParams.type = "Reshape"; - layerParams.set("dim", DictValue::arrayInt(&lstmShape[0], lstmShape.size())); - node_proto.set_input(0, lstmParams.name); // redirect input to LSTM - node_proto.set_output(0, output_name); // keep origin LSTM's name - addLayer(layerParams, node_proto); +void ONNXImporter::lstm_extractConsts(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, size_t idx, int* blobShape_, int size) +{ + MatShape blobShape(blobShape_, blobShape_ + size); + Mat blob; + if (idx < lstm_proto.input_size() && !lstm_proto.input(idx).empty()) + { + blob = getBlob(lstm_proto, idx); + CV_Assert(shape(blob) == blobShape); + } + else + { + blob = Mat(blobShape, CV_32FC1, 0.); + } + layerParams.blobs.push_back(blob); +}; + +void ONNXImporter::lstm_add_reshape(const std::string& input_name, const std::string& output_name, int* layerShape, size_t n) +{ + LayerParams reshapeLp; + reshapeLp.name = cv::format("%s/reshape", input_name.c_str()); + reshapeLp.type = "Reshape"; + CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); + + reshapeLp.set("dim", DictValue::arrayInt(layerShape, n)); + + opencv_onnx::NodeProto reshape_proto; + reshape_proto.add_input(input_name); + reshape_proto.add_output(output_name); + addLayer(reshapeLp, reshape_proto); +}; + +std::string ONNXImporter::lstm_add_slice(int index, const std::string& input_name, int* begin, int* end, size_t n) +{ + LayerParams sliceLP; + sliceLP.name = cv::format("%s/slice_%d", input_name.c_str(), index); + sliceLP.type = "Slice"; + CV_Assert(layer_id.find(sliceLP.name) == layer_id.end()); + + sliceLP.set("begin", DictValue::arrayInt(begin, n)); + sliceLP.set("end", DictValue::arrayInt(end, n)); + sliceLP.set("axis", 0); + + opencv_onnx::NodeProto slice_proto; + slice_proto.add_input(input_name); + slice_proto.add_output(sliceLP.name); + addLayer(sliceLP, slice_proto); + + return slice_proto.output(0); +}; + +std::string ONNXImporter::lstm_fix_dims(LayerParams& layerParams, const opencv_onnx::NodeProto& lstm_proto, + int batch_size, int num_directions, int hidden_size, bool need_y, const std::string& y_name, + const int index) +{ + std::string reshape_output = cv::format("%s/reshape_%d", layerParams.name.c_str(), index); + + // reshape from Seq, Batch, Dirs*Hidden to Seq, Batch, Dirs, Hidden + // to not confuse reshape with dynamic first dimension, zero means 'leave unchanged' + int layerShape[] = {0, batch_size, num_directions, hidden_size}; + lstm_add_reshape(lstm_proto.output(index), reshape_output, layerShape, sizeof(layerShape) / sizeof(layerShape[0])); + + // permute from Seq, Batch, Dirs, Hidden to Seq, Dirs, Batch, Hidden + LayerParams permuteLP; + permuteLP.name = reshape_output + "/permute"; + permuteLP.type = "Permute"; + CV_Assert(layer_id.find(permuteLP.name) == layer_id.end()); + + int order[] = {0, 2, 1, 3}; + permuteLP.set("order", DictValue::arrayInt(order, 4)); + + opencv_onnx::NodeProto permute_proto; + permute_proto.add_input(reshape_output); + permute_proto.add_output((need_y && index == 0) ? y_name : static_cast(permuteLP.name)); + addLayer(permuteLP, permute_proto); + + return permute_proto.output(0); +}; + +void ONNXImporter::lstm_add_transform(int num_directions, int batch_size, int hidden_size, + int index, const std::string& input_name, const std::string& output_name) +{ + if (num_directions == 1) + { + // Slice: Yh = Y[-1, :, :, :] + int begin[] = {-1}, end[] = {INT_MAX}; + std::string slice_output = lstm_add_slice(index, input_name, begin, end, sizeof(begin) / sizeof(begin[0])); + + // Reshape: 1x1xBxH -> 1xBxH + int layerShape[] = {1, batch_size, hidden_size}; + lstm_add_reshape(slice_output, output_name, layerShape, sizeof(layerShape) / sizeof(layerShape[0])); + } + else + { + // Slice: SxDxBxH -> last sequence, first direction + int begin0[] = {-1, 0}, end0[] = {INT_MAX, 1}; + std::string slice_0 = lstm_add_slice(0, input_name, begin0, end0, sizeof(begin0) / sizeof(begin0[0])); + + // Slice: SxDxBxH -> first sequence, last direction + int begin1[] = {0, -1}, end1[] = {1, INT_MAX}; + std::string slice_1 = lstm_add_slice(1, input_name, begin1, end1, sizeof(begin1) / sizeof(begin1[0])); + + LayerParams concatLP; + concatLP.name = cv::format("%s/concat", input_name.c_str()); + concatLP.type = "Concat"; + CV_Assert(layer_id.find(concatLP.name) == layer_id.end()); + + concatLP.set("axis", 1); // 1x1xBxH -> 1x2xBxH + + opencv_onnx::NodeProto concat_proto; + concat_proto.add_input(slice_0); + concat_proto.add_input(slice_1); + concat_proto.add_output(concatLP.name); + addLayer(concatLP, concat_proto); + + // Reshape: 1x2xBxH -> 2xBxH + int layerShape[] = {2, batch_size, hidden_size}; + lstm_add_reshape(concat_proto.output(0), output_name, layerShape, sizeof(layerShape) / sizeof(layerShape[0])); + } +}; + +void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto lstm_proto = node_proto_; + layerParams.name += "/lstm"; + + // https://github.com/onnx/onnx/blob/main/docs/Operators.md#LSTM + CV_Assert(lstm_proto.input_size() >= 3); + for (size_t i = 1; i < 3; ++i) + { + const std::string& name = lstm_proto.input(i); + CV_Assert(!name.empty() && constBlobs.count(name) == 1); + } + + IterShape_t shapeIt = outShapes.find(lstm_proto.input(0)); + CV_Assert(shapeIt != outShapes.end()); + const MatShape x_shape = shapeIt->second; + + const int seq_length = x_shape[0]; + const int batch_size = x_shape[1]; + const int input_size = x_shape[2]; + const int hidden_size = layerParams.get("hidden_size"); + const int num_directions = constBlobs[lstm_proto.input(1)].size[0]; + + int w_size[] = {num_directions, 4*hidden_size, input_size}; + lstm_extractConsts(layerParams, lstm_proto, 1, w_size, sizeof(w_size) / sizeof(w_size[0])); // W + + int r_size[] = {num_directions, 4*hidden_size, hidden_size}; + lstm_extractConsts(layerParams, lstm_proto, 2, r_size, sizeof(r_size) / sizeof(r_size[0])); // R + + int b_size[] = {num_directions, 8*hidden_size}; + lstm_extractConsts(layerParams, lstm_proto, 3, b_size, sizeof(b_size) / sizeof(b_size[0])); // B + + if (4 < lstm_proto.input_size() && !lstm_proto.input(4).empty()) + { + Mat blob = getBlob(lstm_proto, 4); + CV_Assert(blob.total() == batch_size); + for (MatIterator_ it = blob.begin(); it != blob.end(); ++it) + { + CV_Assert(*it == seq_length); + } + } + + int h_size[] = {num_directions, batch_size, hidden_size}; + lstm_extractConsts(layerParams, lstm_proto, 5, h_size, sizeof(h_size) / sizeof(h_size[0])); // initial_h + + int c_size[] = {num_directions, batch_size, hidden_size}; + lstm_extractConsts(layerParams, lstm_proto, 6, c_size, sizeof(c_size) / sizeof(c_size[0])); // initial_c + + if (lstm_proto.input_size() > 7 && !lstm_proto.input(7).empty()) + { + layerParams.set("use_peephole", true); + int p_size[] = {num_directions, 3 * hidden_size}; + lstm_extractConsts(layerParams, lstm_proto, 7, p_size, sizeof(p_size) / sizeof(p_size[0])); // P + } + + transformBlobs(layerParams.blobs); + + layerParams.set("is_onnx", true); + layerParams.set("reverse", layerParams.get("direction", "") == "reverse"); + layerParams.set("bidirectional", layerParams.get("direction", "") == "bidirectional"); + + bool need_yc = lstm_proto.output_size() > 2 && !lstm_proto.output(2).empty(); + bool need_yh = lstm_proto.output_size() > 1 && !lstm_proto.output(1).empty(); + bool need_y = lstm_proto.output_size() > 0 && !lstm_proto.output(0).empty(); + + const std::string y_name = need_y ? lstm_proto.output(0) : ""; + const std::string yh_name = need_yh ? lstm_proto.output(1) : ""; + const std::string yc_name = need_yc ? lstm_proto.output(2) : ""; + + layerParams.set("produce_cell_output", need_yc); + + lstm_proto.clear_output(); + if (need_y || need_yh) + { + // give random names to LSTMLayer's outputs because every output needs postprocessing + lstm_proto.add_output(cv::format("%s_y", layerParams.name.c_str())); + } + if (need_yc) + { + lstm_proto.add_output(yc_name); + } + + addLayer(layerParams, lstm_proto); + + std::string y_output = lstm_fix_dims(layerParams, lstm_proto, batch_size, num_directions, hidden_size, need_y, + y_name, 0); + if (need_yh) + { + lstm_add_transform(num_directions, batch_size, hidden_size, 0, y_output, yh_name); + } } void ONNXImporter::parseGRU(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) @@ -2485,9 +2689,15 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node if (!haveVariables) { - if (broadcast_axes.size() != 1) + if (broadcast_axes.size() > 1) CV_Error(Error::StsNotImplemented, "Expand op doesn't support multiple axes for constant input"); + if (broadcast_axes.empty()) + { + addConstant(output_name, getBlob(node_proto, 0)); + return; + } + Mat input = getBlob(node_proto, 0); input = input.reshape(0, total(inpShape, 0, broadcast_axes[0])); Mat output = cv::repeat(input, 1, targetShape[broadcast_axes[0]]); @@ -2708,7 +2918,7 @@ void ONNXImporter::parseGather(LayerParams& layerParams, const opencv_onnx::Node sliceLp.type = "Slice"; sliceLp.name = inpShape.size() > 1 ? layerParams.name + "/slice" : layerParams.name; std::vector begin(inpShape.size(), 0); - std::vector end(inpShape.size(), -1); + std::vector end(inpShape.size(), INT_MAX); begin[axis] = index; end[axis] = index + 1; @@ -3049,6 +3259,7 @@ void ONNXImporter::parseQConv(LayerParams& layerParams, const opencv_onnx::NodeP layerParams.type = "ConvolutionInt8"; layerParams.set("num_output", outCn); layerParams.set("input_zeropoint", inp_zp.at(0)); + layerParams.set("input_scale",inp_sc.at(0)); layerParams.blobs.push_back(weights); layerParams.blobs.push_back(biasFused); layerParams.blobs.push_back(outputMultiplier); @@ -3088,6 +3299,9 @@ void ONNXImporter::parseQMatMul(LayerParams& layerParams, const opencv_onnx::Nod layerParams.type = "InnerProductInt8"; layerParams.set("num_output", outCn); layerParams.set("axis", firstInpDims - secondInpDims + 1); + layerParams.set("input_scale", inp_sc.at(0)); + layerParams.set("input_zeropoint", inp_zp.at(0)); + layerParams.blobs.push_back(weights); layerParams.blobs.push_back(bias); layerParams.blobs.push_back(outputMultiplier); @@ -3158,6 +3372,7 @@ void ONNXImporter::parseQEltwise(LayerParams& layerParams, const opencv_onnx::No rescaleParams.set("depth", CV_8S); rescaleParams.set("scale", scale); rescaleParams.set("shift", shift); + rescaleParams.set("isEltwise", true); addLayer(rescaleParams, node_proto); return; } @@ -3206,7 +3421,6 @@ void ONNXImporter::parseQEltwise(LayerParams& layerParams, const opencv_onnx::No Mat blob_dequantized; blob.convertTo(blob_dequantized, CV_32F, inp_scales[1], -(inp_scales[1] * inp_zps[1])); layerParams.blobs.push_back(blob_dequantized); - layerParams.set("input_scales", DictValue::arrayReal(inp_scales.data(), inp_scales.size())); } } } @@ -3221,9 +3435,9 @@ void ONNXImporter::parseQEltwise(LayerParams& layerParams, const opencv_onnx::No { layerParams.type = "ScaleInt8"; layerParams.set("bias_term", op == "sum"); - layerParams.set("input_scales", DictValue::arrayReal(inp_scales.data(), inp_scales.size())); } + layerParams.set("input_scales", DictValue::arrayReal(inp_scales.data(), inp_scales.size())); layerParams.set("input_zeropoints", DictValue::arrayInt(inp_zps.data(), inp_zps.size())); addLayer(layerParams, node_proto); } @@ -3249,6 +3463,9 @@ void ONNXImporter::parseQLeakyRelu(LayerParams& layerParams, const opencv_onnx:: } layerParams.type = "ReLUInt8"; + layerParams.set("input_scale", inp_sc); + layerParams.set("input_zeropoint", inp_zp); + layerParams.set("slope", slope); layerParams.blobs.push_back(lookUpTable); addLayer(layerParams, node_proto); } @@ -3273,6 +3490,8 @@ void ONNXImporter::parseQSigmoid(LayerParams& layerParams, const opencv_onnx::No } layerParams.type = "SigmoidInt8"; + layerParams.set("input_scale", inp_sc); + layerParams.set("input_zeropoint", inp_zp); layerParams.blobs.push_back(lookUpTable); addLayer(layerParams, node_proto); } @@ -3326,6 +3545,7 @@ void ONNXImporter::parseQConcat(LayerParams& layerParams, const opencv_onnx::Nod rescaleParams.set("depth", CV_8S); rescaleParams.set("scale", scale); rescaleParams.set("shift", shift); + rescaleParams.set("isEltwise", false); opencv_onnx::NodeProto proto; proto.add_input(node_proto.input(i)); @@ -3405,8 +3625,10 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version) dispatch["MaxUnpool"] = &ONNXImporter::parseMaxUnpool; dispatch["MaxPool"] = &ONNXImporter::parseMaxPool; dispatch["AveragePool"] = &ONNXImporter::parseAveragePool; - dispatch["GlobalAveragePool"] = dispatch["GlobalMaxPool"] = dispatch["ReduceMean"] = dispatch["ReduceSum"] = - dispatch["ReduceMax"] = &ONNXImporter::parseReduce; + dispatch["GlobalAveragePool"] = dispatch["GlobalMaxPool"] = &ONNXImporter::parseGlobalPool; + dispatch["ReduceMax"] = dispatch["ReduceMin"] = dispatch["ReduceMean"] = dispatch["ReduceSum"] = dispatch["ReduceMax"] = + dispatch["ReduceMin"] = dispatch["ReduceSumSquare"] = dispatch["ReduceProd"] = dispatch["ReduceL1"] = + dispatch["ReduceL2"] = dispatch["ReduceLogSum"] = dispatch["ReduceLogSumExp"] = &ONNXImporter::parseReduce; dispatch["Slice"] = &ONNXImporter::parseSlice; dispatch["Split"] = &ONNXImporter::parseSplit; dispatch["Add"] = dispatch["Sum"] = dispatch["Sub"] = &ONNXImporter::parseBias; @@ -3453,8 +3675,8 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version) std::vector simpleLayers{"Acos", "Acosh", "Asin", "Asinh", "Atan", "Atanh", "Ceil", "Celu", "Cos", "Cosh", "Dropout", "Erf", "Exp", "Floor", "HardSigmoid", "HardSwish", - "Identity", "Log", "Round", "Selu", "Sigmoid", "Sin", "Sinh", "Softmax", - "Softplus", "Softsign", "Sqrt", "Tan", "ThresholdedRelu"}; + "Identity", "Log", "Round", "Reciprocal", "Selu", "Sign", "Sigmoid", "Sin", "Sinh", "Softmax", + "Softplus", "Softsign", "Shrink", "Sqrt", "Tan", "ThresholdedRelu"}; for (const auto& name : simpleLayers) { dispatch[name] = &ONNXImporter::parseSimpleLayers; diff --git a/modules/dnn/src/op_cuda.cpp b/modules/dnn/src/op_cuda.cpp new file mode 100644 index 0000000000..a1b588ecfb --- /dev/null +++ b/modules/dnn/src/op_cuda.cpp @@ -0,0 +1,106 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#ifdef HAVE_CUDA +#include "op_cuda.hpp" +#include "cuda4dnn/init.hpp" +#include "net_impl.hpp" + +namespace cv { namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +void Net::Impl::initCUDABackend(const std::vector& blobsToKeep_) +{ + CV_Assert(preferableBackend == DNN_BACKEND_CUDA); + + if (!cudaInfo) /* we need to check only once */ + cuda4dnn::checkVersions(); + + if (cuda4dnn::getDeviceCount() <= 0) + CV_Error(Error::StsError, "No CUDA capable device found."); + + if (cuda4dnn::getDevice() < 0) + CV_Error(Error::StsError, "No CUDA capable device selected."); + + if (!cuda4dnn::isDeviceCompatible()) + CV_Error(Error::GpuNotSupported, "OpenCV was not built to work with the selected device. Please check CUDA_ARCH_PTX or CUDA_ARCH_BIN in your build configuration."); + + if (preferableTarget == DNN_TARGET_CUDA_FP16 && !cuda4dnn::doesDeviceSupportFP16()) + { + CV_LOG_WARNING(NULL, "The selected CUDA device does not support FP16 target; switching to FP32 target."); + preferableTarget = DNN_TARGET_CUDA; + } + + if (!cudaInfo) + { + cuda4dnn::csl::CSLContext context; + context.stream = cuda4dnn::csl::Stream(true); + context.cublas_handle = cuda4dnn::csl::cublas::Handle(context.stream); + context.cudnn_handle = cuda4dnn::csl::cudnn::Handle(context.stream); + + auto d2h_stream = cuda4dnn::csl::Stream(true); // stream for background D2H data transfers + cudaInfo = std::unique_ptr(new CudaInfo_t(std::move(context), std::move(d2h_stream))); + } + + cudaInfo->workspace = cuda4dnn::csl::Workspace(); // release workspace memory if any + + for (auto& layer : layers) + { + auto& ld = layer.second; + if (ld.id == 0) + { + for (auto& wrapper : ld.inputBlobsWrappers) + { + auto cudaWrapper = wrapper.dynamicCast(); + cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream); + } + } + + for (auto& wrapper : ld.outputBlobsWrappers) + { + auto cudaWrapper = wrapper.dynamicCast(); + cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream); + } + } + + for (auto& layer : layers) + { + auto& ld = layer.second; + auto& layerInstance = ld.layerInstance; + + if (!layerInstance->supportBackend(DNN_BACKEND_CUDA)) + { + std::ostringstream os; + os << "CUDA backend will fallback to the CPU implementation for the layer \"" << ld.name + << "\" of type " << ld.type << '\n'; + CV_LOG_INFO(NULL, os.str().c_str()); + continue; + } + + /* we make a copy so that `initCUDA` doesn't modify `cudaInfo->context` */ + auto context = cudaInfo->context; + auto node = layerInstance->initCUDA(&context, ld.inputBlobsWrappers, ld.outputBlobsWrappers); + ld.backendNodes[DNN_BACKEND_CUDA] = node; + + auto cudaNode = node.dynamicCast(); + cudaInfo->workspace.require(cudaNode->get_workspace_memory_in_bytes()); + } + + if (blobsToKeep_.size() > 1) + { + for (const auto& pin : blobsToKeep_) + { + LayerData& ld = layers[pin.lid]; + ld.cudaD2HBackgroundTransfers.push_back(pin.oid); + } + } +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn +#endif // HAVE_CUDA diff --git a/modules/dnn/src/op_halide.cpp b/modules/dnn/src/op_halide.cpp index c96971bc6a..653de36146 100644 --- a/modules/dnn/src/op_halide.cpp +++ b/modules/dnn/src/op_halide.cpp @@ -8,15 +8,135 @@ #include "precomp.hpp" #include #include "op_halide.hpp" +#include "net_impl.hpp" #ifdef HAVE_HALIDE +#include "halide_scheduler.hpp" + #include #endif // HAVE_HALIDE -namespace cv +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +void Net::Impl::setHalideScheduler(const String& scheduler) { -namespace dnn + halideConfigFile = scheduler; +} + + +#ifdef HAVE_HALIDE + + +void Net::Impl::compileHalide() { + CV_TRACE_FUNCTION(); + + CV_Assert(preferableBackend == DNN_BACKEND_HALIDE); + + HalideScheduler scheduler(halideConfigFile); + std::vector< std::reference_wrapper > compileList; compileList.reserve(64); + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) + { + LayerData& ld = it->second; + Ptr layer = ld.layerInstance; + if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skip) + { + CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty()); + bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]); + if (!scheduled) + { + // Use automatic scheduling provided by layer. + layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE], + ld.inputBlobs, ld.outputBlobs, + preferableTarget); + } + compileList.emplace_back(ld); + } + } + std::atomic progress(0); + auto fn = ([&] () -> void + { + for (;;) + { + int id = progress.fetch_add(1); + if ((size_t)id >= compileList.size()) + return; + const LayerData& ld = compileList[id].get(); + Ptr node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second; + dnn::compileHalide(ld.outputBlobs, node, preferableTarget); + } + }); + size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency()); + num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads)); + std::vector threads(num_threads - 1); + for (auto& t: threads) t = std::thread(fn); + fn(); // process own tasks + for (auto& t: threads) t.join(); +} + + +void Net::Impl::initHalideBackend() +{ + CV_TRACE_FUNCTION(); + CV_Assert_N(preferableBackend == DNN_BACKEND_HALIDE, haveHalide()); + + // Iterator to current layer. + MapIdToLayerData::iterator it = layers.begin(); + // Iterator to base layer for fusion. In example, in case of conv+bn+relu + // it'll be a conv layer. + MapIdToLayerData::iterator baseIt = layers.begin(); + for (; it != layers.end(); it++) + { + LayerData &ldTop = it->second; + Ptr layerTop = ldTop.layerInstance; + if (!layerTop->supportBackend(preferableBackend)) + { + // Move base iterator to layer that don't support preferable + // backend to prevent fusion over layer of different backend. + baseIt = it; + continue; + } + // Try to do layers fusion. + LayerData &ldBot = baseIt->second; + Ptr layerBot = ldBot.layerInstance; + // 1. Check that bottom and top from the same backends. + if (it != layers.begin() && layerBot->supportBackend(preferableBackend)) + { + // 2. Check that current layer works in-place. + bool inPlace = ldTop.inputBlobs.size() == 1 && + ldBot.outputBlobs.size() == 1 && + ldTop.inputBlobs[0]->data == + ldBot.outputBlobs[0].data; + if (inPlace) + { + // 3. Try to attach node. + CV_Assert(!ldBot.backendNodes[preferableBackend].empty()); + Ptr fusedNode = + layerTop->tryAttach(ldBot.backendNodes[preferableBackend]); + if (!fusedNode.empty()) + { + ldTop.skip = true; + ldBot.backendNodes[preferableBackend] = fusedNode; + ldBot.outputBlobsWrappers = ldTop.outputBlobsWrappers; + continue; + } + } + } + // No layers fusion. + ldTop.skip = false; + ldTop.backendNodes[DNN_BACKEND_HALIDE] = + layerTop->initHalide(ldTop.inputBlobsWrappers); + baseIt = it; + } +} + + +#endif // HAVE_HALIDE +CV__DNN_INLINE_NS_END + #ifdef HAVE_HALIDE static MatShape getBufferShape(const MatShape& shape) @@ -226,5 +346,83 @@ bool haveHalide() #endif // HAVE_HALIDE } -} // namespace dnn -} // namespace cv + +CV__DNN_INLINE_NS_BEGIN + + +void Layer::applyHalideScheduler(Ptr& node, const std::vector &inputs, + const std::vector &outputs, int targetId) const +{ +#ifndef HAVE_HALIDE + CV_Error(Error::StsNotImplemented, ""); +#else + CV_TRACE_FUNCTION(); + + Halide::Var x("x"), y("y"), c("c"), n("n"), co("co"), ci("ci"), + xo("xo"), xi("xi"), yo("yo"), yi("yi"), tile("tile"); + Halide::Func& top = node.dynamicCast()->funcs.back(); + + int outW, outH, outC, outN; + getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN); + + if (targetId == DNN_TARGET_CPU) + { + if (outW == 1 && outH == 1) + { + if (outC + outN == 1) + return; + + if (outC > 8) + top.split(c, co, ci, 8) + .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile) + .parallel(tile) + .vectorize(ci, 8); + else + top.fuse(x, y, tile).fuse(c, tile, tile).fuse(n, tile, tile) + .parallel(tile); + } + else + { + if (outH > 2) + { + top.reorder(x, c, y) + .split(y, yo, yi, 2) + .fuse(yo, n, tile) + .parallel(tile) + .unroll(yi) + .vectorize(x, outW >= 16 ? 16 : outW); + } + } + } + else if (targetId == DNN_TARGET_OPENCL) + { + if (outW == 1 && outH == 1) + { + int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC; + top.split(c, co, ci, c_split) + .fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile) + .gpu_blocks(tile) + .gpu_threads(ci); + } + else + { + int x_split = outW > 8 ? (outW >= 32 ? 16 : 8) : outW; + int y_split = outH > 8 ? (outH >= 32 ? 16 : 8) : outH; + // Supported vectorization widths: 2, 3, 4, 8, 16 + int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : std::min(4, outC); + top.split(x, xo, xi, x_split).split(y, yo, yi, y_split) + .split(c, co, ci, c_split) + .gpu_blocks(xo, yo, co) + .gpu_threads(xi, yi) + .reorder(xi, yi, ci, xo, yo, co) + .vectorize(ci); + } + } + else + CV_Error(Error::StsNotImplemented, "Unknown target identifier"); +#endif // HAVE_HALIDE +} + + +CV__DNN_INLINE_NS_END +}} // namespace diff --git a/modules/dnn/src/op_inf_engine.cpp b/modules/dnn/src/op_inf_engine.cpp index 2899545c6d..8a27dc2221 100644 --- a/modules/dnn/src/op_inf_engine.cpp +++ b/modules/dnn/src/op_inf_engine.cpp @@ -254,6 +254,31 @@ cv::String getInferenceEngineCPUType() return cpu_type; } + +namespace openvino { + +bool checkTarget(Target target) +{ + // Lightweight detection + const std::vector devices = getCore("").GetAvailableDevices(); + for (std::vector::const_iterator i = devices.begin(); i != devices.end(); ++i) + { + if (std::string::npos != i->find("MYRIAD") && target == DNN_TARGET_MYRIAD) + return true; + if (std::string::npos != i->find("HDDL") && target == DNN_TARGET_HDDL) + return true; + else if (std::string::npos != i->find("FPGA") && target == DNN_TARGET_FPGA) + return true; + else if (std::string::npos != i->find("CPU") && target == DNN_TARGET_CPU) + return true; + else if (std::string::npos != i->find("GPU") && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + return true; + } + return false; +} + +} // namespace openvino + #else // HAVE_INF_ENGINE cv::String getInferenceEngineBackendType() diff --git a/modules/dnn/src/op_inf_engine.hpp b/modules/dnn/src/op_inf_engine.hpp index ed1323d7dd..856441e71d 100644 --- a/modules/dnn/src/op_inf_engine.hpp +++ b/modules/dnn/src/op_inf_engine.hpp @@ -73,6 +73,13 @@ void infEngineBlobsToMats(const std::vector& blobs, CV__DNN_INLINE_NS_BEGIN +namespace openvino { + +// TODO: use std::string as parameter +bool checkTarget(Target target); + +} // namespace openvino + bool isMyriadX(); bool isArmComputePlugin(); diff --git a/modules/dnn/src/op_timvx.cpp b/modules/dnn/src/op_timvx.cpp new file mode 100644 index 0000000000..107d660d39 --- /dev/null +++ b/modules/dnn/src/op_timvx.cpp @@ -0,0 +1,931 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2019-2021, Shenzhen Institute of Artificial Intelligence and +// Robotics for Society, all rights reserved. +// Third party copyrights are property of their respective owners. + +#include "precomp.hpp" +#include +#include "op_timvx.hpp" +#include "net_impl.hpp" + +namespace cv +{ +namespace dnn +{ +#ifdef HAVE_TIMVX + +CV__DNN_INLINE_NS_BEGIN + +// update all comsumer +void Net::Impl::tvUpdateConfictMap(int graphIndex, LayerData& ld, std::vector >& graphConflictMap) +{ + if (ld.consumers.empty()) + return; + for (int i = 0; i < ld.consumers.size(); i++) + { + LayerData &consumerld = layers[ld.consumers[i].lid]; + std::vector::iterator it = std::find(graphConflictMap[ld.consumers[i].lid].begin(), + graphConflictMap[ld.consumers[i].lid].end(), graphIndex); + + if (it == graphConflictMap[ld.consumers[i].lid].end()) + { + graphConflictMap[ld.consumers[i].lid].push_back(graphIndex); + tvUpdateConfictMap(graphIndex, consumerld, graphConflictMap); + } + else + continue; + } +} + +// Convert TRANSIENT to OUTPUT +void Net::Impl::tvConvertToOutputNode(const LayerData& ld, Ptr& targetWrap) +{ + // find right layer. + for (auto& inputLayerId : ld.inputLayersId) + { + LayerData &inputld = layers[inputLayerId]; + auto itWrap = std::find(inputld.outputBlobsWrappers.begin(), + inputld.outputBlobsWrappers.end(), targetWrap); + if (itWrap != inputld.outputBlobsWrappers.end()) + { + auto outputWrap = (*itWrap).dynamicCast(); + if (!outputWrap->isTensor()) + continue; + + auto inputNode = inputld.backendNodes[DNN_BACKEND_TIMVX].dynamicCast(); + if (!inputNode->isLast && inputNode->opIndex != -1) + { + CV_Assert(outputWrap->getTensorAttr() == tim::vx::TRANSIENT); + // set last + inputNode->isLast = true; + + auto shapeType = getShapeTypeFromMat(outputWrap->getMat()); + auto outQuant = outputWrap->getTensorQuantization(); + + outputWrap->setTensorShape(shapeType); + outputWrap->createTensor(inputNode->tvGraph->graph, + tim::vx::TensorAttribute::OUTPUT, outQuant); + int outIndex = inputNode->tvGraph->addWrapper(outputWrap); + inputNode->outputIndexList.clear(); + inputNode->outputIndexList.push_back(outIndex); + } + } + } +} + +void Net::Impl::initTimVXBackend() +{ + CV_TRACE_FUNCTION(); + CV_Assert(preferableBackend == DNN_BACKEND_TIMVX); + + // Build TimVX Graph from sets of layers that support this TimVX backend. + // Split a whole model on several TimVX Graph if some of layers are not implemented by TimVX backend. + if (!haveTimVX()) + return; + + // Allocate graphConflictMap + if (timVxInfo.graphConflictMap.empty()) + timVxInfo.graphConflictMap.resize(layers.size()); + + auto it = layers.begin(); + bool isLast = false; // If the node is the last node in current tvGraph. + + for (; it != layers.end(); it++) + { + isLast = false; + LayerData &ld = it->second; + if(ld.skip) + continue; + Ptr layer = ld.layerInstance; + if (!layer->supportBackend(preferableBackend)) + { + continue; + } + + // If layer consumers are more than one, set isLast true. + // For now, TimVX backend divides multiple branchs into multiple tvGraph. + if (ld.consumers.size() == 0) + { + isLast = true; + } + else if(ld.consumers.size() == 1) + { + LayerData* consumerld = &layers[ld.consumers[0].lid]; + + while (consumerld) + { + if (consumerld->skip) + { + if (consumerld->consumers.size() == 1) + { + int nextLayerId = consumerld->consumers[0].lid; + consumerld = &layers[nextLayerId]; + } + else + { + isLast = true; + break; + } + } + else + { + break; + } + } + Ptr& consumerLayer = consumerld->layerInstance; + + if (!isLast && !consumerLayer->supportBackend(preferableBackend)) + { + isLast = true; + } + } + else + { + // If there are is multiple input, and only one of them is supported. + int tvSupportNum = 0; + for (int i = 0; iskip) + { + if (consumerld->consumers.size() == 1) + { + int nextLayerId = consumerld->consumers[0].lid; + consumerld = &layers[nextLayerId]; + } + else + { + isLast = true; + break; + } + } + else + { + break; + } + } + Ptr& consumerLayer = consumerld->layerInstance; + + if (consumerLayer->supportBackend(preferableBackend)) + { + tvSupportNum++; + } + } + + if (tvSupportNum != 1) + isLast = true; + } + + int graphIndex = -1; + bool needRecorrect = !timVxInfo.findGraphIndex(ld.inputBlobsWrappers, graphIndex); + + if (graphIndex != -1 && !needRecorrect) + { + needRecorrect = timVxInfo.isConflict(ld.id, graphIndex); + } + + // Recorrect the input layer. + if (needRecorrect) + { + // set all inputLayers' as last layer, and convert TRANSIENT to output. + for (int i = 0; i < ld.inputBlobsWrappers.size(); i++) + { + auto inputWrap = ld.inputBlobsWrappers[i]; + auto tvInputWrap = inputWrap.dynamicCast(); + if (!tvInputWrap->isTensor()) + continue; + + auto attr = tvInputWrap->getTensorAttr(); + if (attr == tim::vx::TensorAttribute::OUTPUT) + { + continue; + } + else if (attr == tim::vx::TensorAttribute::INPUT) + { + Mat matTmp = tvInputWrap->getMat(); + tvInputWrap = Ptr(new TimVXBackendWrapper(matTmp)); + + } + else if (attr == tim::vx::TensorAttribute::TRANSIENT) + { + tvConvertToOutputNode(ld, tvInputWrap); + // updateConflictMap + tvUpdateConfictMap(graphIndex, ld, timVxInfo.graphConflictMap); + } + } + graphIndex = -1; + } + + if (graphIndex == -1) + { + graphIndex = timVxInfo.createGraph(); + } + timVxInfo.setTmpGraphIndex(graphIndex); + + ld.backendNodes[DNN_BACKEND_TIMVX] = + layer->initTimVX(&timVxInfo, ld.inputBlobsWrappers, ld.outputBlobsWrappers, isLast); + + // post process, create last node correctly. + if (isLast && ld.backendNodes[DNN_BACKEND_TIMVX]) + { + auto tmpNode = ld.backendNodes[DNN_BACKEND_TIMVX].dynamicCast(); + tmpNode->isLast = true; + // update graphConflictMap + tvUpdateConfictMap(graphIndex, ld, timVxInfo.graphConflictMap); + } + + // post process for failing to create timvx Node. + if (!ld.backendNodes[DNN_BACKEND_TIMVX]) + { + for (int i = 0; i < ld.inputBlobsWrappers.size(); i++) + { + auto inputWrap = ld.inputBlobsWrappers[i]; + auto tvInputWrap = inputWrap.dynamicCast(); + if (!tvInputWrap->isTensor()) + continue; + + auto attr = tvInputWrap->getTensorAttr(); + if (attr == tim::vx::TensorAttribute::TRANSIENT) + { + tvConvertToOutputNode(ld, tvInputWrap); + } + } + } + } + + // Op Binding + it = layers.begin(); + Ptr node; + std::vector > tmpGrapList; + for (; it != layers.end(); it++) + { + LayerData &ld = it->second; + + if (ld.backendNodes[DNN_BACKEND_TIMVX]) + node = ld.backendNodes[DNN_BACKEND_TIMVX].dynamicCast(); + else + continue; + + // Binding tvTensor and tvOp + if (node->opIndex >= 0) + node->opBinding(); + } +} + +CV__DNN_INLINE_NS_END + +// from CPU to NPU +bool copyToTensor(std::shared_ptr &dst, const Mat &src) +{ + CV_Assert(src.isContinuous() && (src.type() == CV_8S || src.type() == CV_32F)); + if (dst->CopyDataToTensor(src.data, src.total())) + { + return true; + } + else + return false; +} + +// from NPU to CPU +bool copyToMat(const Mat &dst, std::shared_ptr &src) +{ + CV_Assert(dst.isContinuous() && (dst.type() == CV_8S || dst.type() == CV_32F)); + if (src->CopyDataFromTensor(dst.data)) + { + return true; + } + else + return false; +} + +tvActivationType getTimVXActType(String & actString) +{ + if (actString == "ReLUInt8") return tvActReLU; + if (actString == "ReLU6Int8") return tvActReLU6; + if (actString == "TanHInt8") return tvActTanH; + if (actString == "SwishInt8") return tvActSwish; + if (actString == "MishInt8") return tvActMish; + if (actString == "SigmoidInt8") return tvActSigmoid; + if (actString == "ELUInt8") return tvActELU; + + return tvActNotSupported; +} + +tim::vx::ShapeType getShapeTypeFromMat(const Mat& mat, bool ifConst) +{ + /* Convert Mat shape to TimVX Tensor shape. + DataLayout in TimVX is WHCN, while NCHW in OpenCV. + So we do vector reverse. + */ + CV_Assert(!mat.empty()); + tim::vx::ShapeType tvInputShape; + auto matShape = shape(mat); + tvInputShape.assign(matShape.begin(), matShape.end()); + + if ( matShape.size() > 1 ) // TODO: check when we need reverse the shape vector. + { + if (ifConst && tvInputShape.size() == 2 && tvInputShape[1] == 1) + { // if bias vector, shape [n, 1] to [n]. + tvInputShape.resize(1); + } + else + std::reverse(tvInputShape.begin(), tvInputShape.end()); + } + return tvInputShape; +} + +bool getQuantType(const std::vector& scales, int numOutput) +{ + CV_Assert(!scales.empty()); + if (numOutput == -1) + { + numOutput = scales.size(); + } + bool tvSymmetric = false; + + for (int i =1; i < numOutput; i++) + { + if (std::abs(scales[0] - scales[i]) > std::numeric_limits::epsilon()) + { + tvSymmetric = true; + break; + } + } + + return tvSymmetric; +} + +// convert mat Depth to tensorDataType +tim::vx::DataType dataTypeConvert(int matDepth) +{ + tim::vx::DataType tensorDataType; + switch(matDepth) + { + case CV_8U: + { + tensorDataType = tim::vx::DataType::UINT8; + break; + } + case CV_8S: + { + tensorDataType = tim::vx::DataType::INT8; + break; + } + case CV_16U: + { + tensorDataType = tim::vx::DataType::UINT16; + break; + } + case CV_16S: + { + tensorDataType = tim::vx::DataType::INT16; + break; + } + case CV_32S: + { + tensorDataType = tim::vx::DataType::INT32; + break; + } + case CV_32F: + { + tensorDataType = tim::vx::DataType::FLOAT32; + break; + } + case CV_16F: + { + tensorDataType = tim::vx::DataType::FLOAT16; + break; + } + default: + { + tensorDataType = tim::vx::DataType::UNKNOWN; + break; + } + } + return tensorDataType; +} + +std::vector > getWrappers(const std::vector wrappersIndex, + Ptr tvGraph) +{ + std::vector > wrappers; + for (int i = 0; igetWrapper(wrappersIndex[i]); + if (wrapper) + wrappers.push_back(wrapper); + } + + return wrappers; +} + +// *********************** TimVXGraph ******************** +TimVXGraph::TimVXGraph() +{ + // new TimVX Graph + context = tim::vx::Context::Create(); + graph = context->CreateGraph(); + isCompiled = false; +} + +TimVXGraph::~TimVXGraph() +{ + + // release opList + for (auto& tensor: tensorList) + tensor.reset(); + + // release tensorList + for (auto& op: opList) + op.reset(); + + // release graph + graph.reset(); + + // release context + context.reset(); +} + +std::shared_ptr TimVXGraph::getOp(const int opIndex) +{ + CV_Assert(0 <= opIndex && !opList.empty() && opIndex < opList.size()); + return opList[opIndex]; +} + +int TimVXGraph::addWrapper(Ptr& tensorWrapper) +{ + CV_Assert(tensorWrapper->isTensor()); + tim::vx::TensorAttribute tensorAttr = tensorWrapper->getTensorAttr(); + + wrapperList.push_back(tensorWrapper); + tensorList.push_back(tensorWrapper->getTensor()); + int wrapperIndex = wrapperList.size() -1; + + if (tensorAttr == tim::vx::TensorAttribute::INPUT) + { + inputWrappersIndex.push_back(wrapperIndex); + } + + if (tensorAttr == tim::vx::TensorAttribute::OUTPUT) + { + outputWrappersIndex.push_back(wrapperIndex); + } + + return wrapperIndex; +} + +Ptr TimVXGraph::getWrapper(int wrapperIndex) +{ + CV_Assert(wrapperIndex>=0 && wrapperIndex < wrapperList.size()); + return wrapperList[wrapperIndex]; +} + +int TimVXGraph::addOp(const std::shared_ptr& op) +{ + CV_Assert(op); + opList.emplace_back(op); + return opList.size()-1; +} + +int TimVXGraph::getTensorIndex(const std::shared_ptr& tensor) +{ + auto it = find(tensorList.begin(), tensorList.end(), tensor); + if (it != tensorList.end()) + return it - tensorList.begin(); + else + return -1; +} + +void TimVXGraph::forward() +{ + CV_Assert(!inputWrappersIndex.empty() && !outputWrappersIndex.empty()); + + // Every TimVXGraph Instance only compiles once. + if (!this->isCompiled) + { + if (!graph->Compile()) + CV_Error(cv::Error::StsBadArg, " Fail to compile TimVX graph!"); + this->isCompiled = true; + } + + if (!graph->Run()) + CV_Error(cv::Error::StsBadArg, " Fail to run TimVX graph!"); +} + +// *********************** TimVXBackendNode ******************** +TimVXBackendNode::TimVXBackendNode(const Ptr& tvGraph_): BackendNode(DNN_BACKEND_TIMVX) +{ + opIndex = -1; + tvGraph = tvGraph_; + isLast = false; +} + +TimVXBackendNode::TimVXBackendNode(const Ptr& tvGraph_, + const std::shared_ptr& op_): BackendNode(DNN_BACKEND_TIMVX) +{ + tvGraph = tvGraph_; + opIndex = tvGraph->addOp(op_); + isLast = false; +} + +TimVXBackendNode::TimVXBackendNode(const Ptr& tvGraph_, std::shared_ptr& op_, + std::vector& inputsIndex, std::vector& outpusIndex) + :BackendNode(DNN_BACKEND_TIMVX) +{ + tvGraph = tvGraph_; + opIndex = tvGraph->addOp(op_); + isLast = false; + + if (!inputsIndex.empty()) + inputIndexList.assign(inputsIndex.begin(), inputsIndex.end()); + + if (!outpusIndex.empty()) + outputIndexList.assign(outpusIndex.begin(), outpusIndex.end()); +} + +bool TimVXBackendNode::opBinding() +{ + if (!tvGraph || tvGraph->isCompiled || opIndex == -1) + return false; + + std::shared_ptr op = tvGraph->getOp(opIndex); + + if (!inputIndexList.empty()) + { + std::vector > inputsWrapper = getWrappers(inputIndexList, tvGraph); + // Binding input Tensor. + for (auto& warpper: inputsWrapper) + { + op->BindInput(warpper->getTensor()); + } + } + + if (!outputIndexList.empty()) + { + std::vector > outputsWrapper = getWrappers(outputIndexList, tvGraph); + for (auto& warpper: outputsWrapper) + { + op->BindOutput(warpper->getTensor()); + } + } + return true; +} + +void TimVXBackendNode::setInputTensor() +{ + if (!tvGraph || opIndex == -1) + return; + + if (!inputIndexList.empty()) + { + std::vector > inputsWrapper = getWrappers(inputIndexList, tvGraph); + + // Binding input Tensor. + for (auto& warpper: inputsWrapper) + { + if (warpper->getTensorAttr() == tim::vx::TensorAttribute::INPUT) + { + warpper->setHostDirty(); + warpper->copyToDevice(); + } + } + } +} + +// *********************** TimVXBackendWrapper ******************** +// Default Constructor +TimVXBackendWrapper::TimVXBackendWrapper() : BackendWrapper(DNN_BACKEND_TIMVX, DNN_TARGET_NPU) +{ + isTensor_ = false; + deviceDirty = false; + hostDirty = false; + tensorType = tim::vx::DataType::UNKNOWN; + tensorShape = {}; + tensorIndex = -1; + tensorAttr = tim::vx::TensorAttribute::CONSTANT; +} + +TimVXBackendWrapper::TimVXBackendWrapper(Mat& m) : BackendWrapper(DNN_BACKEND_TIMVX, + DNN_TARGET_NPU) +{ + host = m; + isTensor_ = false; + deviceDirty = false; + hostDirty = true; + tensorType = dataTypeConvert(m.type()); + tensorShape = {}; + tensorIndex = -1; + tensorAttr = tim::vx::TensorAttribute::CONSTANT; + + // TODO: unsupported data by TimVX should run convert function first. + CV_Assert(tensorType != tim::vx::DataType::UNKNOWN); +} + +TimVXBackendWrapper::TimVXBackendWrapper(const Ptr& baseBuffer, Mat& m) + :BackendWrapper(DNN_BACKEND_TIMVX, DNN_TARGET_NPU) +{ + Ptr base = baseBuffer.dynamicCast(); + CV_Assert(!base.empty()); + tensor = base->tensor; + isTensor_ = base->isTensor_; + tensorIndex = base->tensorIndex; + tensorType = base->tensorType; + tensorAttr = base->tensorAttr; + tensorShape = base->tensorShape; + deviceDirty = base->deviceDirty; + hostDirty = base->hostDirty; + host = m; +} + +TimVXBackendWrapper::TimVXBackendWrapper(std::shared_ptr& tensor_) + :BackendWrapper(DNN_BACKEND_TIMVX, DNN_TARGET_NPU) +{ + tensor = tensor_; + isTensor_ = true; + deviceDirty = true; + hostDirty = false; + tensorType = tensor_->GetDataType(); // getTensor DataType. + tensorAttr = tensor_->GetSpec().attr_; // getTensor Attribution. + tensorShape = tensor_->GetShape(); + tensorIndex = -1; +} + +void TimVXBackendWrapper::setTensorShape(const tim::vx::ShapeType & matShape) +{ + CV_Assert(!matShape.empty()); + tensorShape.assign(matShape.begin(), matShape.end()); +} + +int TimVXBackendWrapper::getTensorIndex() +{ + CV_Assert(isTensor_); + return tensorIndex; +} + +tim::vx::TensorAttribute TimVXBackendWrapper::getTensorAttr() +{ + CV_Assert(isTensor_); + return tensorAttr; +} + +// Create tensor +void TimVXBackendWrapper::createTensor(std::shared_ptr& graph, + tim::vx::TensorAttribute tensorAttribute) +{ + Ptr epmtyQuant = nullptr; + return this->createTensor(graph, tensorAttribute, epmtyQuant); +} + +// Create tensor +void TimVXBackendWrapper::createTensor(std::shared_ptr& graph, + tim::vx::TensorAttribute tensorAttribute, Ptr& tvQuant) +{ + CV_Assert(graph); + tim::vx::TensorSpec tensorSpec; + + if (tensorAttribute == tim::vx::INPUT) + { + CV_Assert(!host.empty()); + tensorShape = getShapeTypeFromMat(host); + } + else if (tensorAttribute == tim::vx::OUTPUT) + { + CV_Assert(!tensorShape.empty() && !host.empty()); + tensorShape = getShapeTypeFromMat(host); + } + else if (tensorAttribute == tim::vx::CONSTANT) + { + if (!host.empty()) + tensorShape = getShapeTypeFromMat(host, true); + } + else + { + if (!host.empty()) + tensorShape = getShapeTypeFromMat(host); + } + + // Tensor shape + if (tvQuant) + { + tensorSpec = tim::vx::TensorSpec(tensorType, tensorShape, tensorAttribute, *tvQuant); + } + else + { + tensorSpec = tim::vx::TensorSpec(tensorType, tensorShape, tensorAttribute); + } + + if (!host.empty() && tensorAttribute != tim::vx::INPUT && tensorAttribute != tim::vx::OUTPUT && tensorAttribute != tim::vx::TRANSIENT) + { + tensor = graph->CreateTensor(tensorSpec, (void *)(host.data)); + } + else + { + tensor = graph->CreateTensor(tensorSpec); + } + isTensor_ = true; + + // set Attribution + tensorAttr = tensorAttribute; +} + +Ptr TimVXBackendWrapper::getTensorQuantization() +{ + CV_Assert(isTensor_ && tensor); + auto quantize = tensor->GetQuantization(); + return makePtr(quantize); +} + +std::shared_ptr TimVXBackendWrapper::getTensor() +{ + CV_Assert(isTensor_); + return tensor; +} + +Mat TimVXBackendWrapper::getMat() +{ + if (host.empty()) + return {}; + return host; +} + + +bool TimVXBackendWrapper::isTensor() +{ + return isTensor_; +} + +void TimVXBackendWrapper::copyToHost() +{ + if (deviceDirty && !host.empty()) + { + copyToMat(host, tensor); + deviceDirty = false; + } +} + +void TimVXBackendWrapper::setHostDirty() +{ + hostDirty = true; +} + +void TimVXBackendWrapper::setDeviceDirty() +{ + deviceDirty = true; +} + +void TimVXBackendWrapper::copyToDevice() +{ + if (isTensor_ && hostDirty && !host.empty()) + { + copyToTensor(tensor, host); + hostDirty = false; + } +} + +// *********************** TimVXInfo ******************** +TimVXInfo::TimVXInfo() +{ + graphIndex = -1; +} + +TimVXInfo::~TimVXInfo() +{} + +int TimVXInfo::createGraph() +{ + Ptr tmpGraph = Ptr(new TimVXGraph()); + this->tvGraphList.push_back(tmpGraph); + return this->tvGraphList.size() - 1; +} + +bool TimVXInfo::findGraphIndex(const std::vector > &inputsWrapper, int& graphIndex) +{ + graphIndex = -1; + int wrapperSize = inputsWrapper.size(); + int graphSize = tvGraphList.size(); + + if (wrapperSize != 0 && graphSize == 0) + { + return true; + } + + int tensorIndex = -1; + Ptr wrapper; + Ptr tvGraph; + + for (int i = 0; i < graphSize; i++) + { + tvGraph = tvGraphList[i]; + for (int j = 0; j < wrapperSize; j++ ) + { + wrapper = inputsWrapper[j].dynamicCast(); + + if (!wrapper->isTensor()) // Skip wrapper without Tensor. + continue; + + tensorIndex = tvGraph->getTensorIndex(wrapper->getTensor()); + if (tensorIndex != -1 && wrapper->getTensorAttr() == tim::vx::TensorAttribute::TRANSIENT) + { + if (graphIndex == -1) + graphIndex = i; + else if (graphIndex != i) // if inputs of the same inputWrapper are from differen tvGraph. + { + graphIndex = -1; + return false; + } + } + } + } + return true; +} + +void TimVXInfo::setTmpGraphIndex(int graphIndex) +{ + this->graphIndex = graphIndex; +} + +int TimVXInfo::getTmpGraphIndex() +{ + int res = -1; + if (graphIndex != -1) + { + res = graphIndex; + graphIndex = -1; + } + return res; +} + +bool TimVXInfo::isConflict(int layerId, int graphIndex) +{ + if (graphConflictMap[layerId].empty()) + return false; + + std::vector::iterator it = std::find(graphConflictMap[layerId].begin(), + graphConflictMap[layerId].end(), graphIndex); + if (it != graphConflictMap[layerId].end()) + return true; + else + return false; +} + +Ptr TimVXInfo::getGraph() +{ + int index = getTmpGraphIndex(); + if (0 <= index && index < tvGraphList.size()) + return tvGraphList[index]; + else + return {}; +} + +#endif + +void forwardTimVX(std::vector >& outputs, const Ptr& node_) +{ +#ifdef HAVE_TIMVX + CV_Assert(!node_.empty()); + Ptr node = node_.dynamicCast(); + + if (node) + { + // set input + node->setInputTensor(); + + // graph Forward + if (node->isLast) + { + node->tvGraph->forward(); + } + } + else + return; + + // set ouput + Ptr outWarpper; + for (int i = 0; i < outputs.size(); i++) + { + outWarpper = outputs[i].dynamicCast(); + if (outWarpper->isTensor() && outWarpper->getTensorAttr() == tim::vx::TensorAttribute::OUTPUT) + { + outWarpper->setDeviceDirty(); + outWarpper->copyToHost(); + } + } +#endif +} + +bool haveTimVX() +{ +#ifdef HAVE_TIMVX + return true; +#else + return false; +#endif +} +} // namespace dnn +} // namespace cv \ No newline at end of file diff --git a/modules/dnn/src/op_timvx.hpp b/modules/dnn/src/op_timvx.hpp new file mode 100644 index 0000000000..d08cf64bb5 --- /dev/null +++ b/modules/dnn/src/op_timvx.hpp @@ -0,0 +1,187 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2019-2021, Shenzhen Institute of Artificial Intelligence and +// Robotics for Society, all rights reserved. +// Third party copyrights are property of their respective owners. + +#ifndef OPENCV_DNN_OP_TIMVX_HPP +#define OPENCV_DNN_OP_TIMVX_HPP + +#include + +// TimVX head file. +#ifdef HAVE_TIMVX +#include "tim/vx/context.h" +#include "tim/vx/graph.h" +#include "tim/vx/operation.h" +#include "tim/vx/ops.h" +#include "tim/vx/tensor.h" +#endif // HAVE_TIMVX + +namespace cv +{ +namespace dnn +{ +#ifdef HAVE_TIMVX + +enum tvActivationType{ + tvActNotSupported = -1, + tvActReLU, + tvActReLU6, + tvActTanH, + tvActSwish, + tvActMish, + tvActSigmoid, + tvActELU +}; + +// Data copied from/to Mat to/from Tensor. Change the shape of dst if +// needed to make it the same shape as src. +bool copyToTensor(Ptr &dst, const Mat &src); +bool copyToMat(const Mat &dst, Ptr &src); +tvActivationType getTimVXActType(String & actString); + +// Convert Mat shape to TimVX TensorShape +tim::vx::ShapeType getShapeTypeFromMat(const Mat& mat, bool ifConst = false); + +// if all value in weight +bool getQuantType(const std::vector& scales, int numOutput = -1); + +class TimVXInfo; +class TimVXGraph; +class TimVXBackendNode; +class TimVXBackendWrapper; + +// Maintain the tvGraph and tvTensor List. For now, every tvGraph only have one output node, and each node +// in tvGraph has only one output too. It could be optimized in future. +// TODO: tvGraph supports multiple output node. +class TimVXGraph +{ +public: + TimVXGraph(); + ~TimVXGraph(); + std::shared_ptr getOp(const int opIndex); + + // It will add tensorWrapper to wrapperList, and return index. + // And add tensor Ptr to tensorList. + int addWrapper(Ptr& tensorWrapper); + + void forward(); + + // Add new op to opList, and return the index. + int addOp(const std::shared_ptr& op); + + // If tensor existed in tensorList, return the tensorIndex, otherwise return -1. + int getTensorIndex(const std::shared_ptr& tensor); + + Ptr getWrapper(int wrapperIndex); + + std::shared_ptr graph; + bool isCompiled; // Every tvGraph can only be compiled once. + +private: + std::shared_ptr context; + std::vector inputWrappersIndex; + std::vector outputWrappersIndex; + std::vector > wrapperList; + std::vector > tensorList; + std::vector > opList; +}; + +class TimVXBackendNode : public BackendNode +{ +public: + TimVXBackendNode(const Ptr& tvGraph); + TimVXBackendNode(const Ptr& tvGraph, const std::shared_ptr& op); + TimVXBackendNode(const Ptr& tvGraph, std::shared_ptr& op, + std::vector& inputsIndex, std::vector& outpusIndex); + + void setInputTensor(); + bool opBinding(); + + // flag for marking OutputNode of tvGraph this node is the last node in this TimVX Graph. + bool isLast; + int opIndex; + + // index of tensor and wrapper. + std::vector inputIndexList; + std::vector outputIndexList; + Ptr tvGraph; +}; + +class TimVXBackendWrapper : public BackendWrapper +{ +public: + TimVXBackendWrapper(); + TimVXBackendWrapper(Mat& m); + TimVXBackendWrapper(const Ptr& baseBuffer, Mat& m); + TimVXBackendWrapper(std::shared_ptr& tensor); + + // Create Output Tensor + void createTensor(std::shared_ptr& graph, tim::vx::TensorAttribute tensorAttribute); + void createTensor(std::shared_ptr& graph, tim::vx::TensorAttribute tensorAttribute, + Ptr& tvQuant); + std::shared_ptr getTensor(); + Mat getMat(); + + // The Output tensor in TimVX doesn't have HostMat, The shape can only be given. + void setTensorShape(const tim::vx::ShapeType & matShape); + int getTensorIndex(); + Ptr getTensorQuantization(); + tim::vx::TensorAttribute getTensorAttr(); + bool isTensor(); + + // Data Copy, CPU <==> NPU + virtual void copyToHost() CV_OVERRIDE; + virtual void setHostDirty() CV_OVERRIDE; + void setDeviceDirty(); + void copyToDevice(); + +private: + tim::vx::DataType tensorType; + bool deviceDirty; + bool hostDirty; + int tensorIndex; // index of tensorList in specific TimVXGraph. + bool isTensor_; + Mat host; + + tim::vx::ShapeType tensorShape; + std::shared_ptr tensor; + tim::vx::TensorAttribute tensorAttr; +}; + +// Contain all created tvGraphList, used in every +class TimVXInfo{ +public: + TimVXInfo(); + ~TimVXInfo(); + + // Find the right graph Index set as graphIndex, if cannot find, return empty ptr. + Ptr getGraph(); + bool findGraphIndex(const std::vector > &inputsWrapper, int& graphIndex); + void setTmpGraphIndex(int graphIndex); + bool isConflict(int layerId, int graphIndex); + + // create a TimVXGraph, add it to tvGraphList, and return the index in tvGraphList. + int createGraph(); + + // graphConflictIndex[layerIndex] saves conflict graph index, which should be excluded + std::vector > graphConflictMap; + +private: + int getTmpGraphIndex(); + std::vector > tvGraphList; + int graphIndex; + +}; + +#endif + +void forwardTimVX(std::vector > &outputs, const Ptr& node); +bool haveTimVX(); +} // namespace dnn +} // namespace cv + +#endif // OPENCV_DNN_OP_TIMVX_HPP \ No newline at end of file diff --git a/modules/dnn/src/op_vkcom.cpp b/modules/dnn/src/op_vkcom.cpp index a252104240..7249c27ce2 100644 --- a/modules/dnn/src/op_vkcom.cpp +++ b/modules/dnn/src/op_vkcom.cpp @@ -8,12 +8,51 @@ #include "precomp.hpp" #include #include "op_vkcom.hpp" +#include "net_impl.hpp" namespace cv { namespace dnn { #ifdef HAVE_VULKAN + +CV__DNN_INLINE_NS_BEGIN + + +void Net::Impl::initVkComBackend() +{ + CV_TRACE_FUNCTION(); + CV_Assert(preferableBackend == DNN_BACKEND_VKCOM); + + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) + { + LayerData &ld = it->second; + Ptr layer = ld.layerInstance; + if (!layer->supportBackend(preferableBackend)) + { + continue; + } + + ld.skip = false; + + try + { + ld.backendNodes[DNN_BACKEND_VKCOM] = + layer->initVkCom(ld.inputBlobsWrappers); + } + catch (const cv::Exception& e) + { + CV_LOG_ERROR(NULL, "initVkCom failed, fallback to CPU implementation. " << e.what()); + ld.backendNodes[DNN_BACKEND_VKCOM] = Ptr(); + } + } +} + +CV__DNN_INLINE_NS_END + + +/////////////////////////////////////////////////////////////////////////////// + void copyToTensor(vkcom::Tensor &dst, const Mat &src) { CV_Assert(src.isContinuous() && src.type() == CV_32F); diff --git a/modules/dnn/src/op_webnn.cpp b/modules/dnn/src/op_webnn.cpp index 4dba55bcbe..d5c1740423 100644 --- a/modules/dnn/src/op_webnn.cpp +++ b/modules/dnn/src/op_webnn.cpp @@ -2,6 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. +#include "precomp.hpp" #include #include "op_webnn.hpp" @@ -13,10 +14,281 @@ #include +#include "net_impl.hpp" + namespace cv { namespace dnn { #ifdef HAVE_WEBNN +CV__DNN_INLINE_NS_BEGIN + + +void Net::Impl::addWebnnOutputs(LayerData &ld) +{ + CV_TRACE_FUNCTION(); + + Ptr layerNet; + auto it = ld.backendNodes.find(preferableBackend); + if (it != ld.backendNodes.end()) + { + Ptr node = it->second; + if (!node.empty()) + { + Ptr webnnNode = node.dynamicCast(); + CV_Assert(!webnnNode.empty()); CV_Assert(!webnnNode->net.empty()); + layerNet = webnnNode->net; + } + } + + for (int i = 0; i < ld.inputBlobsId.size(); ++i) + { + LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; + Ptr inpNode = inpLd.backendNodes[preferableBackend]; + if (!inpNode.empty()) + { + Ptr webnnInpNode = inpNode.dynamicCast(); + CV_Assert(!webnnInpNode.empty()); CV_Assert(!webnnInpNode->net.empty()); + if (layerNet != webnnInpNode->net) + { + webnnInpNode->net->addOutput(webnnInpNode->name); + webnnInpNode->net->setUnconnectedNodes(webnnInpNode); + } + } + } +} + + +void Net::Impl::initWebnnBackend(const std::vector& blobsToKeep_) +{ + CV_TRACE_FUNCTION(); + CV_Assert_N(preferableBackend == DNN_BACKEND_WEBNN, haveWebnn()); + + Ptr net; + + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) + { + LayerData &ld = it->second; + if (ld.id == 0) + { + CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) || + (netInputLayer->outNames.size() == ld.outputBlobsWrappers.size())); + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + Ptr wrapper = ld.outputBlobsWrappers[i].dynamicCast(); + std::string outputName = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i]; + outputName = ld.outputBlobsWrappers.size() > 1 ? (outputName + "." + std::to_string(i)) : outputName; + wrapper->name = outputName; + } + } + else + { + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + Ptr wrapper = ld.outputBlobsWrappers[i].dynamicCast(); + std::string outputName = ld.outputBlobsWrappers.size() > 1 ? (ld.name + "." + std::to_string(i)) : ld.name; + wrapper->name = outputName; + } + } + } + + // Build WebNN networks from sets of layers that support this + // backend. Split a whole model on several WebNN networks if + // some of layers are not implemented. + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) + { + LayerData &ld = it->second; + + if (ld.id == 0 && ld.skip) + continue; + + bool fused = ld.skip; + Ptr layer = ld.layerInstance; + if (!fused && !layer->supportBackend(preferableBackend)) + { + // For test use. when not using WebNN, the test case will fail + // with the following code. + CV_LOG_WARNING(NULL, "Layer " + ld.type + " name " + ld.name + " is unsupported by WebNN backend."); + + addWebnnOutputs(ld); + net = Ptr(); + layer->preferableTarget = DNN_TARGET_CPU; + + for (int i = 0; i < ld.inputBlobsId.size(); ++i) + { + LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; + Ptr inpNode = inpLd.backendNodes[preferableBackend]; + if (!inpNode.empty()) { + Ptr webnnNode = inpNode.dynamicCast(); + CV_Assert(!webnnNode.empty()); + webnnNode->net->setUnconnectedNodes(webnnNode); + } + } + continue; + } + ld.skip = true; // Initially skip all WebNN supported layers. + + // Create a new network if one of inputs from different WebNN graph. + std::vector> inputNodes; + for (int i = 0; i < ld.inputBlobsId.size(); ++i) + { + // Layer_Test_ROIPooling.Accuracy has 2 inputs inpLD = 0, 0 -> has 4 inputNodes (input, rois, input, rois) + if (inputNodes.size() == ld.inputBlobsId.size()) { + break; + } + LayerData &inpLd = layers[ld.inputBlobsId[i].lid]; + Ptr inpNode = inpLd.backendNodes[preferableBackend]; + if (!inpNode.empty()) + { + Ptr webnnInpNode = inpNode.dynamicCast(); + CV_Assert(!webnnInpNode.empty()); CV_Assert(!webnnInpNode->net.empty()); + if (webnnInpNode->net == net && !fused) { + inputNodes.push_back(inpNode); + continue; + } + } + + if (net.empty()) { + net = Ptr(new WebnnNet()); + } + + if (!fused) { + std::vector inputNames; + std::vector inputs; + + auto curr_pos = inpLd.consumers.begin(); + auto compare = [&ld] (const LayerPin& lp) { return lp.lid == ld.id; }; + auto cons = curr_pos; + while ((cons = std::find_if(curr_pos, inpLd.consumers.end(), compare)) != + inpLd.consumers.end()) { + int cons_inp = cons->oid; + Ptr inpWrapper = inpLd.outputBlobsWrappers[cons_inp]. + dynamicCast(); + CV_Assert(!inpWrapper.empty()); + auto iter = std::find(inputNames.begin(), inputNames.end(), + inpWrapper->name); + if (iter == inputNames.end()) { + inputNames.push_back(inpWrapper->name); + inputs.push_back(inpLd.outputBlobs[cons_inp]); + } + curr_pos = cons + 1; + } + + auto inps = net->setInputs(inputs, inputNames); + for (auto& inp : inps) { + WebnnBackendNode* node = new WebnnBackendNode(inp); + node->net = net; + inputNodes.emplace_back(Ptr(node)); + } + } + } + + Ptr node; + if (!net.empty()) + { + if (fused) + { + bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 && + ld.inputBlobs[0]->data == ld.outputBlobs[0].data; + CV_Assert(inPlace); + node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend]; + ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers; + } + } + else { + net = Ptr(new WebnnNet()); + } + + if (!fused) + { + CV_Assert(ld.inputBlobsId.size() == inputNodes.size()); + for (int i = 0; i < ld.inputBlobsId.size(); ++i) + { + int lid = ld.inputBlobsId[i].lid; + int oid = ld.inputBlobsId[i].oid; + if (oid == 0 || lid == 0) + continue; + + auto webnnInpNode = inputNodes[i].dynamicCast(); + inputNodes[i] = Ptr(new WebnnBackendNode(webnnInpNode->operand)); + } + + if (layer->supportBackend(preferableBackend)) + { + if (ld.type == "Const") { + ml::Operand fake_operand; + Ptr fake_input_node = Ptr(new WebnnBackendNode(fake_operand)); + fake_input_node->net = net; + inputNodes.push_back(fake_input_node); + } + node = layer->initWebnn(ld.inputBlobsWrappers, inputNodes); + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + Ptr wrapper = ld.outputBlobsWrappers[i].dynamicCast(); + node.dynamicCast()->name = wrapper->name; + } + } + else + { + continue; + } + } + else if (node.empty()) + continue; + + ld.backendNodes[preferableBackend] = node; + + Ptr webnnNode = node.dynamicCast(); + CV_Assert(!webnnNode.empty()); + webnnNode->net = net; + + if (ld.consumers.empty()) { + // TF EAST_text_detection + webnnNode->net->setUnconnectedNodes(webnnNode); + } + for (const auto& pin : blobsToKeep_) + { + if (pin.lid == ld.id) + { + webnnNode->net->addOutput(webnnNode->name); + break; + } + } + net->addBlobs(ld.inputBlobsWrappers); + net->addBlobs(ld.outputBlobsWrappers); + addWebnnOutputs(ld); + } + + // Initialize all networks. + for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it) + { + LayerData &ld = it->second; + auto iter = ld.backendNodes.find(preferableBackend); + if (iter == ld.backendNodes.end()) + continue; + + Ptr& node = iter->second; + if (node.empty()) + continue; + + Ptr webnnNode = node.dynamicCast(); + if (webnnNode.empty()) + continue; + + CV_Assert(!webnnNode->net.empty()); + + if (!webnnNode->net->isInitialized()) + { + webnnNode->net->setUnconnectedNodes(webnnNode); + webnnNode->net->createNet((Target)preferableTarget); + ld.skip = false; + } + } +} + + +CV__DNN_INLINE_NS_END + + namespace webnn { ml::Operand BuildConstant(const ml::GraphBuilder& builder, const std::vector& dimensions, diff --git a/modules/dnn/src/opencl/activations.cl b/modules/dnn/src/opencl/activations.cl index 3e99f18570..0624f48e19 100644 --- a/modules/dnn/src/opencl/activations.cl +++ b/modules/dnn/src/opencl/activations.cl @@ -306,3 +306,26 @@ __kernel void ThresholdedReluForward(const int n, __global T* in, __global T* ou if(index < n) out[index] = (in[index] > alpha ? in[index] : 0.f); } + +__kernel void ShrinkForward(const int n, __global T* in, __global T* out, + const KERNEL_ARG_DTYPE bias, + const KERNEL_ARG_DTYPE lambd) +{ + int index = get_global_id(0); + if(index < n) + out[index] = in[index] < -lambd ? in[index] + bias : (in[index] > lambd ? in[index] - bias : 0.f); +} + +__kernel void SignForward(const int n, __global T* in, __global T* out) +{ + int index = get_global_id(0); + if(index < n) + out[index] = in[index] > 0.f ? 1.0f : ((in[index] < 0.f) ? -1.0f : 0.0f); +} + +__kernel void ReciprocalForward(const int n, __global T* in, __global T* out) +{ + int index = get_global_id(0); + if(index < n) + out[index] = 1.0f/in[index]; +} \ No newline at end of file diff --git a/modules/dnn/src/precomp.hpp b/modules/dnn/src/precomp.hpp index 6ee693dd6b..abcd3745f9 100644 --- a/modules/dnn/src/precomp.hpp +++ b/modules/dnn/src/precomp.hpp @@ -66,6 +66,15 @@ #undef HAVE_CUDA #endif +#include +#include +#include +#include +#include +#include +#include +#include + #include #include diff --git a/modules/dnn/src/registry.cpp b/modules/dnn/src/registry.cpp new file mode 100644 index 0000000000..697fca6015 --- /dev/null +++ b/modules/dnn/src/registry.cpp @@ -0,0 +1,152 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include "op_halide.hpp" +#include "op_inf_engine.hpp" +#include "ie_ngraph.hpp" +#include "op_vkcom.hpp" +#include "op_cuda.hpp" +#include "op_webnn.hpp" +#include "op_timvx.hpp" + +#include "halide_scheduler.hpp" + + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + + +class BackendRegistry +{ +public: + typedef std::vector< std::pair > BackendsList; + const BackendsList & getBackends() const { return backends; } + static BackendRegistry & getRegistry() + { + static BackendRegistry impl; + return impl; + } + + +private: + BackendRegistry() + { +#ifdef HAVE_HALIDE + backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_CPU)); +#ifdef HAVE_OPENCL + if (cv::ocl::useOpenCL()) + backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL)); +#endif +#endif // HAVE_HALIDE + +#ifdef HAVE_INF_ENGINE + if (openvino::checkTarget(DNN_TARGET_CPU)) + { +#ifdef HAVE_DNN_NGRAPH + backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_CPU)); +#endif + } + if (openvino::checkTarget(DNN_TARGET_MYRIAD)) + { +#ifdef HAVE_DNN_NGRAPH + backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_MYRIAD)); +#endif + } + if (openvino::checkTarget(DNN_TARGET_HDDL)) + { +#ifdef HAVE_DNN_NGRAPH + backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_HDDL)); +#endif + } +#ifdef HAVE_OPENCL + if (cv::ocl::useOpenCL() && ocl::Device::getDefault().isIntel()) + { + if (openvino::checkTarget(DNN_TARGET_OPENCL)) + { +#ifdef HAVE_DNN_NGRAPH + backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL)); +#endif + } + if (openvino::checkTarget(DNN_TARGET_OPENCL_FP16)) + { +#ifdef HAVE_DNN_NGRAPH + backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL_FP16)); +#endif + } + } +#endif +#endif // HAVE_INF_ENGINE + +#ifdef HAVE_WEBNN + if (haveWebnn()) + { + backends.push_back(std::make_pair(DNN_BACKEND_WEBNN, DNN_TARGET_CPU)); + } +#endif // HAVE_WEBNN + +#ifdef HAVE_OPENCL + if (cv::ocl::useOpenCL()) + { + backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)); + backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16)); + } +#endif + + backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)); + +#ifdef HAVE_VULKAN + if (haveVulkan()) + backends.push_back(std::make_pair(DNN_BACKEND_VKCOM, DNN_TARGET_VULKAN)); +#endif + +#ifdef HAVE_CUDA + if (haveCUDA()) + { + backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA)); + backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16)); + } +#endif + +#ifdef HAVE_TIMVX + if (haveTimVX()) + { + backends.push_back(std::make_pair(DNN_BACKEND_TIMVX, DNN_TARGET_NPU)); + } +#endif + } + + BackendsList backends; +}; + + +std::vector> getAvailableBackends() +{ + return BackendRegistry::getRegistry().getBackends(); +} + +std::vector getAvailableTargets(Backend be) +{ + if (be == DNN_BACKEND_DEFAULT) + be = (Backend)getParam_DNN_BACKEND_DEFAULT(); +#ifdef HAVE_INF_ENGINE + if (be == DNN_BACKEND_INFERENCE_ENGINE) + be = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; +#endif + + std::vector result; + const BackendRegistry::BackendsList all_backends = getAvailableBackends(); + for (BackendRegistry::BackendsList::const_iterator i = all_backends.begin(); i != all_backends.end(); ++i) + { + if (i->first == be) + result.push_back(i->second); + } + return result; +} + + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp b/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp index ecb5c62f56..daadc32ad2 100644 --- a/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp +++ b/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp @@ -331,7 +331,7 @@ teng_graph_t tengine_init(const char* layer_name, float* input_, int inch, int g teg_weight = kernel_; } - /* initial the resoruce of tengine */ + /* initial the resource of tengine */ if(false == tengine_init_flag) { init_tengine(); diff --git a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp index 4c856592ab..1275088ae0 100644 --- a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp +++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp @@ -1035,7 +1035,7 @@ void sortByExecutionOrder(tensorflow::GraphDef& net) nodesToAdd.pop_back(); permIds.push_back(nodeToAdd); - + CV_Assert(nodeToAdd < edges.size()); for (int i = 0; i < edges[nodeToAdd].size(); ++i) { int consumerId = edges[nodeToAdd][i]; diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 763abf3b4d..b6d6a73013 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1681,10 +1681,8 @@ void TFImporter::parseStridedSlice(tensorflow::GraphDef& net, const tensorflow:: int end_mask = getLayerAttr(layer, "end_mask").i(); for (int i = 0; i < num; ++i) { - if (ends.at(i) < 0) - ends.at(i) -= 1; if (end_mask & (1 << i)) - ends.at(i) = -1; + ends.at(i) = INT_MAX; if (strides.at(i) != 1) CV_Error(Error::StsNotImplemented, format("StridedSlice with stride %d", strides.at(i))); @@ -1982,15 +1980,16 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso int64_t pads[8]; bool explicit_pads = getExplicitPadding(layerParams, layer, pads); int64_t begs[4] = {}; - int64_t ends[4] = {-1, -1, -1, -1}; + int64_t ends[4] = {}; if (explicit_pads) { name += "/deconv"; layerParams.set("pad_mode", "VALID"); + ends[0] = ends[1] = INT_MAX; for (int i = 2; i < 4; ++i) // begins=[0, 0, a, b], ends=[-1, -1, c, d] { begs[i] = pads[2*i]; - ends[i] = -1 - pads[2*i + 1]; + ends[i] = -pads[2*i + 1]; } } @@ -2010,8 +2009,8 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso const int strideX = layerParams.get("stride_w"); Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0)); int shift = (getDataLayout(layer) == DATA_LAYOUT_NCHW); - const int outH = outShape.at(1 + shift) + begs[2] - 1 - ends[2]; - const int outW = outShape.at(2 + shift) + begs[3] - 1 - ends[3]; + const int outH = outShape.at(1 + shift) + begs[2] - ends[2]; + const int outW = outShape.at(2 + shift) + begs[3] - ends[3]; if (layerParams.get("pad_mode") == "SAME") { layerParams.set("adj_w", (outW - 1) % strideX); diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp index 57a624d541..3a46c8f7c0 100644 --- a/modules/dnn/src/torch/torch_importer.cpp +++ b/modules/dnn/src/torch/torch_importer.cpp @@ -954,7 +954,7 @@ struct TorchImporter int size = scalarParams.get("size"); int begins[] = {0, 0, size, size}; - int ends[] = {-1, -1, -size - 1, -size - 1}; + int ends[] = {INT_MAX, INT_MAX, -size, -size}; newModule->apiType = "Slice"; layerParams.set("begin", DictValue::arrayInt(&begins[0], 4)); diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp index 2b0c75e253..f2c6f1e5a0 100644 --- a/modules/dnn/test/test_backends.cpp +++ b/modules/dnn/test/test_backends.cpp @@ -214,17 +214,31 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe_Different_Width_Height) { if (backend == DNN_BACKEND_HALIDE) applyTestTag(CV_TEST_TAG_DNN_SKIP_HALIDE); -#if defined(INF_ENGINE_RELEASE) - if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && - target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // May hang on some configurations + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // IE exception: Ngraph operation Transpose with name conv15_2_mbox_conf_perm has dynamic output shape on 0 port, but CPU plug-in supports only static shape if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION ); + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && + target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && + target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif Mat sample = imread(findDataFile("dnn/street.png")); @@ -467,8 +481,8 @@ TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow) float scoreDiff = 0.0, iouDiff = 0.0; if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) { - scoreDiff = 0.015; - iouDiff = 0.0731; + scoreDiff = 0.02; + iouDiff = 0.1; } else if (target == DNN_TARGET_CUDA_FP16) { @@ -502,7 +516,7 @@ TEST_P(DNNTestNetwork, DenseNet_121) else if (target == DNN_TARGET_CUDA_FP16) { l1 = 0.008; - lInf = 0.05; + lInf = 0.06; } processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Size(224, 224), "", "", l1, lInf); if (target != DNN_TARGET_MYRIAD || getInferenceEngineVPUType() != CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) @@ -542,6 +556,20 @@ TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16) l1 = 0.3; lInf = 7.6; } + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + { + l1 = 5e-3; + lInf = 5e-3; + } + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + { + lInf = 25; + } +#endif + + processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", inp, "", "", l1, lInf); #if defined(HAVE_INF_ENGINE) && INF_ENGINE_VER_MAJOR_GE(2019010000) expectNoFallbacksFromIE(net); diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index bc50c804f2..b747b47660 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -557,7 +557,7 @@ TEST_P(Test_Caffe_nets, DenseNet_121) if (target == DNN_TARGET_OPENCL_FP16) { #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019020000) - l1 = 0.045; lInf = 0.21; + l1 = 0.05; lInf = 0.3; #else l1 = 0.017; lInf = 0.0795; #endif @@ -633,6 +633,9 @@ TEST_P(opencv_face_detector, Accuracy) std::string model = findDataFile(get<0>(GetParam()), false); dnn::Target targetId = (dnn::Target)(int)get<1>(GetParam()); + if (targetId == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + Net net = readNetFromCaffe(proto, model); Mat img = imread(findDataFile("gpu/lbpcascade/er.png")); Mat blob = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false); @@ -660,6 +663,9 @@ TEST_P(opencv_face_detector, issue_15106) std::string model = findDataFile(get<0>(GetParam()), false); dnn::Target targetId = (dnn::Target)(int)get<1>(GetParam()); + if (targetId == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + Net net = readNetFromCaffe(proto, model); Mat img = imread(findDataFile("cv/shared/lena.png")); img = img.rowRange(img.rows / 4, 3 * img.rows / 4).colRange(img.cols / 4, 3 * img.cols / 4); @@ -673,13 +679,13 @@ TEST_P(opencv_face_detector, issue_15106) // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom] Mat out = net.forward(); Mat ref = (Mat_(1, 7) << 0, 1, 0.9149431, 0.30424616, 0.26964942, 0.88733053, 0.99815309); - normAssertDetections(ref, out, "", 0.2, 6e-5, 1e-4); + normAssertDetections(ref, out, "", 0.89, 6e-5, 1e-4); } INSTANTIATE_TEST_CASE_P(Test_Caffe, opencv_face_detector, Combine( Values("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector_fp16.caffemodel"), - Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL) + testing::ValuesIn(getAvailableTargets(DNN_BACKEND_OPENCV)) ) ); @@ -719,6 +725,18 @@ TEST_P(Test_Caffe_nets, FasterRCNN_vgg16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + + // Check 'backward_compatible_check || in_out_elements_equal' failed at core/src/op/reshape.cpp:427: + // While validating node 'v1::Reshape bbox_pred_reshape (bbox_pred[0]:f32{1,84}, Constant_265242[0]:i64{4}) -> (f32{?,?,?,?})' with friendly_name 'bbox_pred_reshape': + // Requested output shape {1,6300,4,1} is incompatible with input shape {1, 84} + if (target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + static Mat ref = (Mat_(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849, 0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953, 0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166); @@ -742,6 +760,13 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf) CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION ); #endif + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); @@ -763,17 +788,7 @@ TEST_P(Test_Caffe_nets, RFCN) CV_TEST_TAG_LONG, CV_TEST_TAG_DEBUG_VERYLONG ); -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) - // Exception: Function contains several inputs and outputs with one friendly name! (HETERO bug?) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif - if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || - backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); - if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || - backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD); + float scoreDiff = default_l1, iouDiff = default_lInf; if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) { @@ -785,6 +800,41 @@ TEST_P(Test_Caffe_nets, RFCN) scoreDiff = 0.0034; iouDiff = 0.12; } + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Sporadic: "Cannot get memory!" + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + { + scoreDiff = 0.1f; + iouDiff = 0.2f; + } + + // Check 'backward_compatible_check || in_out_elements_equal' failed at core/src/op/reshape.cpp:427: + // While validating node 'v1::Reshape bbox_pred_reshape (ave_bbox_pred_rois[0]:f32{1,8,1,1}, Constant_388[0]:i64{4}) -> (f32{?,?,?,?})' with friendly_name 'bbox_pred_reshape': + // Requested output shape {1,300,8,1} is incompatible with input shape {1, 8, 1, 1} + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // Exception: Function contains several inputs and outputs with one friendly name! (HETERO bug?) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || + backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || + backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD); +#endif + static Mat ref = (Mat_(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234, 0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16); testFaster("rfcn_pascal_voc_resnet50.prototxt", "resnet50_rfcn_final.caffemodel", ref, scoreDiff, iouDiff); diff --git a/modules/dnn/test/test_common.hpp b/modules/dnn/test/test_common.hpp index fb64697574..3d616e17da 100644 --- a/modules/dnn/test/test_common.hpp +++ b/modules/dnn/test/test_common.hpp @@ -48,6 +48,7 @@ #define CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE "dnn_skip_onnx_conformance" #define CV_TEST_TAG_DNN_SKIP_PARSER "dnn_skip_parser" +#define CV_TEST_TAG_DNN_SKIP_TIMVX "dnn_skip_timvx" #ifdef HAVE_INF_ENGINE #if INF_ENGINE_VER_MAJOR_EQ(2018050000) diff --git a/modules/dnn/test/test_common.impl.hpp b/modules/dnn/test/test_common.impl.hpp index 747dc02ce6..35f658cc90 100644 --- a/modules/dnn/test/test_common.impl.hpp +++ b/modules/dnn/test/test_common.impl.hpp @@ -30,6 +30,7 @@ void PrintTo(const cv::dnn::Backend& v, std::ostream* os) case DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019: *os << "DLIE"; return; case DNN_BACKEND_INFERENCE_ENGINE_NGRAPH: *os << "NGRAPH"; return; case DNN_BACKEND_WEBNN: *os << "WEBNN"; return; + case DNN_BACKEND_TIMVX: *os << "TIMVX"; return; } // don't use "default:" to emit compiler warnings *os << "DNN_BACKEND_UNKNOWN(" << (int)v << ")"; } @@ -46,6 +47,7 @@ void PrintTo(const cv::dnn::Target& v, std::ostream* os) case DNN_TARGET_FPGA: *os << "FPGA"; return; case DNN_TARGET_CUDA: *os << "CUDA"; return; case DNN_TARGET_CUDA_FP16: *os << "CUDA_FP16"; return; + case DNN_TARGET_NPU: *os << "NPU"; return; } // don't use "default:" to emit compiler warnings *os << "DNN_TARGET_UNKNOWN(" << (int)v << ")"; } @@ -478,6 +480,11 @@ void initDNNTests() registerGlobalSkipTag( CV_TEST_TAG_DNN_SKIP_CUDA, CV_TEST_TAG_DNN_SKIP_CUDA_FP32, CV_TEST_TAG_DNN_SKIP_CUDA_FP16 ); +#endif +#ifdef HAVE_TIMVX + registerGlobalSkipTag( + CV_TEST_TAG_DNN_SKIP_TIMVX + ); #endif registerGlobalSkipTag( CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE, diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index 3853e1ca72..75942b4f10 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -139,7 +139,14 @@ public: inp.copyTo(inp2(ranges1)); net2.setInput(inp2); Mat out2 = net2.forward(); - EXPECT_EQ(0, cv::norm(out2(ranges0), out2(ranges1), NORM_INF)) << "Batch result is not equal: " << name; + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + EXPECT_LT(cv::norm(out2(ranges0), out2(ranges1), NORM_INF), 1e-4) << "Batch result is not similar: " << name; + } + else + { + EXPECT_EQ(0, cv::norm(out2(ranges0), out2(ranges1), NORM_INF)) << "Batch result is not equal: " << name; + } Mat ref2 = ref; if (ref.dims == 2 && out2.dims == 3) @@ -328,12 +335,10 @@ TEST_P(Test_Darknet_nets, YoloVoc) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); -#endif -#if defined(INF_ENGINE_RELEASE) +#elif defined(INF_ENGINE_RELEASE) if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); // need to update check function @@ -359,12 +364,28 @@ TEST_P(Test_Darknet_nets, YoloVoc) scoreDiff = 0.03; iouDiff = 0.018; } -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) // accuracy if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) { iouDiff = std::numeric_limits::quiet_NaN(); } + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + { + iouDiff = std::numeric_limits::quiet_NaN(); + } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + { + iouDiff = std::numeric_limits::quiet_NaN(); + } + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + { + iouDiff = std::numeric_limits::quiet_NaN(); + } #endif std::string config_file = "yolo-voc.cfg"; @@ -375,12 +396,25 @@ TEST_P(Test_Darknet_nets, YoloVoc) testDarknetModel(config_file, weights_file, ref.rowRange(0, 3), scoreDiff, iouDiff); } +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Exception: input != output + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // [ GENERAL_ERROR ] AssertionFailed: input != output + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif { SCOPED_TRACE("batch size 2"); testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff, 0.24, nmsThreshold); } -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // accuracy if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); @@ -457,14 +491,15 @@ TEST_P(Test_Darknet_nets_async, Accuracy) if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) throw SkipTestException("No support for async forward"); -#if defined(INF_ENGINE_RELEASE) -#if INF_ENGINE_VER_MAJOR_GE(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) if (targetId == DNN_TARGET_MYRIAD && prefix == "yolov3") // NC_OUT_OF_MEMORY - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#else + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + if (targetId == DNN_TARGET_MYRIAD && prefix == "yolov3") // NC_OUT_OF_MEMORY + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) if (targetId == DNN_TARGET_MYRIAD && prefix == "yolov4") // NC_OUT_OF_MEMORY applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif #endif const int numInputs = 2; @@ -519,6 +554,21 @@ TEST_P(Test_Darknet_nets_async, Accuracy) l1 = 0.001; lInf = 0.005; } + if (INF_ENGINE_VER_MAJOR_EQ(2021040000) && targetId == DNN_TARGET_OPENCL_FP16 && prefix == "yolov4-tiny") // FIXIT: 4.x only, 3.4 branch works well + { + l1 = 0.001; + lInf = 0.005; + } + if (INF_ENGINE_VER_MAJOR_EQ(2022010000) && targetId == DNN_TARGET_OPENCL_FP16 && prefix == "yolov4-tiny") // FIXIT: 4.x only, 3.4 branch works well + { + l1 = 0.001; + lInf = 0.005; + } + if (targetId == DNN_TARGET_MYRIAD && prefix == "yolov4") + { + l1 = 0.005; + lInf = 1.6f; // |ref| = 0.95431125164031982 + } } #endif @@ -631,7 +681,7 @@ TEST_P(Test_Darknet_nets, YOLOv4) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif -#if defined(INF_ENGINE_RELEASE) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2022010000) if (target == DNN_TARGET_MYRIAD) // NC_OUT_OF_MEMORY applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif @@ -666,14 +716,24 @@ TEST_P(Test_Darknet_nets, YOLOv4) std::string weights_file = "yolov4.weights"; -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // accuracy (batch 1): no detections + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + { + iouDiff = std::numeric_limits::quiet_NaN(); + } + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + { + iouDiff = std::numeric_limits::quiet_NaN(); + } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // accuracy (batch 1) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) { iouDiff = std::numeric_limits::quiet_NaN(); } -#endif -#if defined(INF_ENGINE_RELEASE) +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2022010000) if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) @@ -691,14 +751,24 @@ TEST_P(Test_Darknet_nets, YOLOv4) { SCOPED_TRACE("batch size 2"); -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) - // accuracy (batch 1) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // accuracy (batch 2) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + { + iouDiff = 0.05f; + } + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + { + iouDiff = std::numeric_limits::quiet_NaN(); + } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // accuracy (batch 2) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) { iouDiff = 0.45f; } -#endif -#if defined(INF_ENGINE_RELEASE) +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2022010000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { if (target == DNN_TARGET_OPENCL) @@ -714,7 +784,14 @@ TEST_P(Test_Darknet_nets, YOLOv4) testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff); } -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // accuracy if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); @@ -876,8 +953,27 @@ INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets, dnnBackendsAndTargets()); TEST_P(Test_Darknet_layers, shortcut) { testDarknetLayer("shortcut"); +} +TEST_P(Test_Darknet_layers, shortcut_leaky) +{ testDarknetLayer("shortcut_leaky"); +} +TEST_P(Test_Darknet_layers, shortcut_unequal) +{ +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif testDarknetLayer("shortcut_unequal"); +} +TEST_P(Test_Darknet_layers, shortcut_unequal_2) +{ +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif testDarknetLayer("shortcut_unequal_2"); } @@ -912,7 +1008,19 @@ TEST_P(Test_Darknet_layers, region) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // accuracy on CPU, OpenCL + // Expected: (normL1) <= (l1), actual: 0.000358148 vs 1e-05 + // |ref| = 1.207319974899292 + // Expected: (normInf) <= (lInf), actual: 0.763223 vs 0.0001 + // |ref| = 1.207319974899292 + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // accuracy on CPU, OpenCL // Expected: (normInf) <= (lInf), actual: 0.763223 vs 0.0001 // |ref| = 1.207319974899292 @@ -949,10 +1057,12 @@ TEST_P(Test_Darknet_layers, maxpool) TEST_P(Test_Darknet_layers, convolutional) { +#if defined(INF_ENGINE_RELEASE) if (target == DNN_TARGET_MYRIAD) { default_l1 = 0.01f; } +#endif testDarknetLayer("convolutional", true); } diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp index 17c6b48101..104d7e7347 100644 --- a/modules/dnn/test/test_halide_layers.cpp +++ b/modules/dnn/test/test_halide_layers.cpp @@ -170,6 +170,23 @@ TEST_P(Deconvolution, Accuracy) Backend backendId = get<0>(get<7>(GetParam())); Target targetId = get<1>(get<7>(GetParam())); +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16) + && inChannels == 6 && outChannels == 4 && group == 1 + && kernel == Size(3, 1) && pad == Size(0, 1) + && stride == Size(1, 1) && dilation == Size(1, 1)) + applyTestTag(targetId == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16) + && inChannels == 6 && outChannels == 4 && group == 1 + && kernel == Size(1, 3) && pad == Size(1, 0) + && stride == Size(1, 1) && dilation == Size(1, 1)) + applyTestTag(targetId == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#endif + #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X @@ -624,9 +641,16 @@ TEST_P(NoParamActivation, Accuracy) { Backend backendId = get<0>(get<1>(GetParam())); Target targetId = get<1>(get<1>(GetParam())); + std::string layer_type = get<0>(GetParam()); + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_CPU && layer_type == "BNLL") + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif LayerParams lp; - lp.type = get<0>(GetParam()); + lp.type = layer_type; lp.name = "testLayer"; testInPlaceActivation(lp, backendId, targetId); } diff --git a/modules/dnn/test/test_ie_models.cpp b/modules/dnn/test/test_ie_models.cpp index 0fe19db5e9..3622f69bdb 100644 --- a/modules/dnn/test/test_ie_models.cpp +++ b/modules/dnn/test/test_ie_models.cpp @@ -290,7 +290,7 @@ void runIE(Target target, const std::string& xmlPath, const std::string& binPath if (cvtest::debugLevel > 0) { const std::vector& dims = desc.getDims(); - std::cout << "Input: '" << it.first << "' precison=" << desc.getPrecision() << " dims=" << dims.size() << " ["; + std::cout << "Input: '" << it.first << "' precision=" << desc.getPrecision() << " dims=" << dims.size() << " ["; for (auto d : dims) std::cout << " " << d; std::cout << "] ocv_mat=" << inputsMap[it.first].size << " of " << typeToString(inputsMap[it.first].type()) << std::endl; @@ -308,7 +308,7 @@ void runIE(Target target, const std::string& xmlPath, const std::string& binPath if (cvtest::debugLevel > 0) { const std::vector& dims = desc.getDims(); - std::cout << "Output: '" << it.first << "' precison=" << desc.getPrecision() << " dims=" << dims.size() << " ["; + std::cout << "Output: '" << it.first << "' precision=" << desc.getPrecision() << " dims=" << dims.size() << " ["; for (auto d : dims) std::cout << " " << d; std::cout << "] ocv_mat=" << outputsMap[it.first].size << " of " << typeToString(outputsMap[it.first].type()) << std::endl; diff --git a/modules/dnn/test/test_int8_layers.cpp b/modules/dnn/test/test_int8_layers.cpp index 6c41a8dbbb..f2f00b08c2 100644 --- a/modules/dnn/test/test_int8_layers.cpp +++ b/modules/dnn/test/test_int8_layers.cpp @@ -12,6 +12,9 @@ testing::internal::ParamGenerator< tuple > dnnBackendsAndTarget { std::vector< tuple > targets; targets.push_back(make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)); +#ifdef HAVE_TIMVX + targets.push_back(make_tuple(DNN_BACKEND_TIMVX, DNN_TARGET_NPU)); +#endif return testing::ValuesIn(targets); } @@ -104,14 +107,29 @@ TEST_P(Test_Int8_layers, Convolution1D) TEST_P(Test_Int8_layers, Convolution2D) { - testLayer("layer_convolution", "Caffe", 0.0174, 0.0758, 1, 1, true); - testLayer("single_conv", "TensorFlow", 0.00413, 0.02201); - testLayer("depthwise_conv2d", "TensorFlow", 0.0388, 0.169); + if(backend == DNN_BACKEND_TIMVX) + testLayer("single_conv", "TensorFlow", 0.00424, 0.02201); + else + testLayer("single_conv", "TensorFlow", 0.00413, 0.02201); + testLayer("atrous_conv2d_valid", "TensorFlow", 0.0193, 0.0633); testLayer("atrous_conv2d_same", "TensorFlow", 0.0185, 0.1322); testLayer("keras_atrous_conv2d_same", "TensorFlow", 0.0056, 0.0244); - testLayer("convolution", "ONNX", 0.0052, 0.01516); - testLayer("two_convolution", "ONNX", 0.00295, 0.00840); + + if(backend == DNN_BACKEND_TIMVX) + testLayer("convolution", "ONNX", 0.00534, 0.01516); + else + testLayer("convolution", "ONNX", 0.0052, 0.01516); + + if(backend == DNN_BACKEND_TIMVX) + testLayer("two_convolution", "ONNX", 0.0033, 0.01); + else + testLayer("two_convolution", "ONNX", 0.00295, 0.00840); + + if(backend == DNN_BACKEND_TIMVX) + applyTestTag(CV_TEST_TAG_DNN_SKIP_TIMVX); + testLayer("layer_convolution", "Caffe", 0.0174, 0.0758, 1, 1, true); + testLayer("depthwise_conv2d", "TensorFlow", 0.0388, 0.169); } TEST_P(Test_Int8_layers, Convolution3D) @@ -130,9 +148,21 @@ TEST_P(Test_Int8_layers, Flatten) TEST_P(Test_Int8_layers, Padding) { - testLayer("padding_valid", "TensorFlow", 0.0026, 0.0064); - testLayer("padding_same", "TensorFlow", 0.0081, 0.032); - testLayer("spatial_padding", "TensorFlow", 0.0078, 0.028); + if (backend == DNN_BACKEND_TIMVX) + testLayer("padding_valid", "TensorFlow", 0.0292, 0.0105); + else + testLayer("padding_valid", "TensorFlow", 0.0026, 0.0064); + + if (backend == DNN_BACKEND_TIMVX) + testLayer("padding_same", "TensorFlow", 0.0085, 0.032); + else + testLayer("padding_same", "TensorFlow", 0.0081, 0.032); + + if (backend == DNN_BACKEND_TIMVX) + testLayer("spatial_padding", "TensorFlow", 0.0079, 0.028); + else + testLayer("spatial_padding", "TensorFlow", 0.0078, 0.028); + testLayer("mirror_pad", "TensorFlow", 0.0064, 0.013); testLayer("pad_and_concat", "TensorFlow", 0.0021, 0.0098); testLayer("padding", "ONNX", 0.0005, 0.0069); @@ -283,20 +313,35 @@ TEST_P(Test_Int8_layers, InnerProduct) { testLayer("layer_inner_product", "Caffe", 0.005, 0.02, 1, 1, true); testLayer("matmul", "TensorFlow", 0.0061, 0.019); - testLayer("nhwc_transpose_reshape_matmul", "TensorFlow", 0.0009, 0.0091); + + if (backend == DNN_BACKEND_TIMVX) + testLayer("nhwc_transpose_reshape_matmul", "TensorFlow", 0.0018, 0.0175); + else + testLayer("nhwc_transpose_reshape_matmul", "TensorFlow", 0.0009, 0.0091); + testLayer("nhwc_reshape_matmul", "TensorFlow", 0.03, 0.071); testLayer("matmul_layout", "TensorFlow", 0.035, 0.06); testLayer("tf2_dense", "TensorFlow", 0, 0); testLayer("matmul_add", "ONNX", 0.041, 0.082); testLayer("linear", "ONNX", 0.0018, 0.0029); - testLayer("constant", "ONNX", 0.00021, 0.0006); + + if (backend == DNN_BACKEND_TIMVX) + testLayer("constant", "ONNX", 0.00048, 0.0013); + else + testLayer("constant", "ONNX", 0.00021, 0.0006); + testLayer("lin_with_constant", "ONNX", 0.0011, 0.0016); } TEST_P(Test_Int8_layers, Reshape) { testLayer("reshape_layer", "TensorFlow", 0.0032, 0.0082); - testLayer("reshape_nchw", "TensorFlow", 0.0089, 0.029); + + if (backend == DNN_BACKEND_TIMVX) + testLayer("reshape_nchw", "TensorFlow", 0.0092, 0.0495); + else + testLayer("reshape_nchw", "TensorFlow", 0.0089, 0.029); + testLayer("reshape_conv", "TensorFlow", 0.035, 0.054); testLayer("reshape_reduce", "TensorFlow", 0.0042, 0.0078); testLayer("reshape_as_shape", "TensorFlow", 0.0014, 0.0028); @@ -307,7 +352,12 @@ TEST_P(Test_Int8_layers, Reshape) testLayer("flatten_by_prod", "ONNX", 0.0048, 0.0081); testLayer("squeeze", "ONNX", 0.0048, 0.0081); testLayer("unsqueeze", "ONNX", 0.0033, 0.0053); - testLayer("squeeze_and_conv_dynamic_axes", "ONNX", 0.0054, 0.0154); + + if (backend == DNN_BACKEND_TIMVX) + testLayer("squeeze_and_conv_dynamic_axes", "ONNX", 0.006, 0.0212); + else + testLayer("squeeze_and_conv_dynamic_axes", "ONNX", 0.0054, 0.0154); + testLayer("unsqueeze_and_conv_dynamic_axes", "ONNX", 0.0037, 0.0151); } @@ -378,6 +428,10 @@ TEST_P(Test_Int8_layers, Dropout) TEST_P(Test_Int8_layers, Eltwise) { testLayer("layer_eltwise", "Caffe", 0.062, 0.15); + + if (backend == DNN_BACKEND_TIMVX) + applyTestTag(CV_TEST_TAG_DNN_SKIP_TIMVX); + testLayer("conv_2_inps", "Caffe", 0.0086, 0.0232, 2, 1, true, false); testLayer("eltwise_sub", "TensorFlow", 0.015, 0.047); testLayer("eltwise_add_vec", "TensorFlow", 0.037, 0.21); // tflite 0.0095, 0.0365 @@ -852,7 +906,7 @@ TEST_P(Test_Int8_nets, opencv_face_detector) 0, 1, 0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494, 0, 1, 0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801); - float confThreshold = 0.5, scoreDiff = 0.002, iouDiff = 0.21; + float confThreshold = 0.5, scoreDiff = 0.002, iouDiff = 0.4; testDetectionNet(net, blob, ref, confThreshold, scoreDiff, iouDiff); } @@ -862,6 +916,8 @@ TEST_P(Test_Int8_nets, EfficientDet) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel()) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL); + if (backend == DNN_BACKEND_TIMVX) + applyTestTag(CV_TEST_TAG_DNN_SKIP_TIMVX); if (target != DNN_TARGET_CPU) { diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 3f4a437637..14a19266cc 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -1485,6 +1485,12 @@ static void test_dldt_fused_output(Backend backend, Target target) TEST_P(Test_DLDT_layers, fused_output) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + CV_DNN_REGISTER_LAYER_CLASS(Unsupported, UnsupportedLayer); try { @@ -1644,7 +1650,16 @@ TEST_P(Test_Caffe_layers, Interp) TEST_P(Test_Caffe_layers, DISABLED_Interp) // requires patched protobuf (available in OpenCV source tree only) #endif { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021030000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021030000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // exception #endif @@ -2082,7 +2097,7 @@ TEST_P(Layer_Test_Slice, variable_input_shape) int targetId = get<1>(GetParam()); int begin[] = {0, 0, 0, 0}; - int end[] = {-1, -1, -1, -1}; + int end[] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX}; Net net; LayerParams lp; diff --git a/modules/dnn/test/test_misc.cpp b/modules/dnn/test/test_misc.cpp index 108131bd9d..0fab7551a5 100644 --- a/modules/dnn/test/test_misc.cpp +++ b/modules/dnn/test/test_misc.cpp @@ -844,8 +844,9 @@ TEST_P(Test_two_inputs, basic) Mat ref; addWeighted(firstInp, kScale, secondInp, kScaleInv, 0, ref, CV_32F); - double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.06 : 1e-6; - double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.3 : 1e-5; + double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD || targetId == DNN_TARGET_CUDA_FP16) ? 0.06 : 1e-6; + double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD || targetId == DNN_TARGET_CUDA_FP16) ? 0.3 : 1e-5; + normAssert(out, ref, "", l1, lInf); if (cvtest::debugLevel > 0 || HasFailure()) diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp index 4f5922182a..25d1a18d52 100644 --- a/modules/dnn/test/test_model.cpp +++ b/modules/dnn/test/test_model.cpp @@ -110,6 +110,9 @@ public: model.setInputSize(size).setInputMean(mean).setInputScale(scale) .setInputSwapRB(swapRB).setInputCrop(crop); + model.setPreferableBackend(backend); + model.setPreferableTarget(target); + model.segment(frame, mask); normAssert(mask, exp, "", norm, norm); } @@ -287,20 +290,20 @@ TEST_P(Test_Model, DetectRegion) CV_TEST_TAG_MEMORY_2GB ); -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) // accuracy if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif - -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif - -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) // FIXIT DNN_BACKEND_INFERENCE_ENGINE is misused if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); @@ -347,20 +350,20 @@ TEST_P(Test_Model, DetectRegionWithNmsAcrossClasses) CV_TEST_TAG_MEMORY_2GB ); -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) // accuracy if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif - -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif - -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); #endif @@ -403,13 +406,28 @@ TEST_P(Test_Model, DetectRegionWithNmsAcrossClasses) TEST_P(Test_Model, DetectionOutput) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Check 'backward_compatible_check || in_out_elements_equal' failed at core/src/op/reshape.cpp:427: + // While validating node 'v1::Reshape bbox_pred_reshape (ave_bbox_pred_rois[0]:f32{1,8,1,1}, Constant_388[0]:i64{4}) -> (f32{?,?,?,?})' with friendly_name 'bbox_pred_reshape': + // Requested output shape {1,300,8,1} is incompatible with input shape {1, 8, 1, 1} + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // Exception: Function contains several inputs and outputs with one friendly name! (HETERO bug?) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif -#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) // FIXIT DNN_BACKEND_INFERENCE_ENGINE is misused if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); @@ -494,7 +512,7 @@ TEST_P(Test_Model, DetectionMobilenetSSD) } else if (target == DNN_TARGET_CUDA_FP16) { - scoreDiff = 0.002; + scoreDiff = 0.0021; iouDiff = 1e-2; } float confThreshold = FLT_MIN; @@ -613,13 +631,48 @@ TEST_P(Test_Model, Segmentation) CV_TEST_TAG_MEMORY_2GB ); + float norm = 0; + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Failed to allocate graph: NC_ERROR + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + { + norm = 25.0f; // depends on OS/OpenCL version + } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // Failed to allocate graph: NC_ERROR + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // cnn_network_ngraph_impl.cpp:104 Function contains several inputs and outputs with one friendly name: 'upscore2'! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // cnn_network_ngraph_impl.cpp:104 Function contains several inputs and outputs with one friendly name: 'upscore2'! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) + // Failed to allocate graph: NC_ERROR + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + + if ((backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + || (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16)) + { + norm = 2.0f; // l1 = 0.01 lInf = 2 + } + std::string inp = _tf("dog416.png"); std::string weights_file = _tf("fcn8s-heavy-pascal.prototxt"); std::string config_file = _tf("fcn8s-heavy-pascal.caffemodel", false); std::string exp = _tf("segmentation_exp.png"); Size size{128, 128}; - float norm = 0; double scale = 1.0; Scalar mean = Scalar(); bool swapRB = false; @@ -629,7 +682,16 @@ TEST_P(Test_Model, Segmentation) TEST_P(Test_Model, TextRecognition) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // FIXIT: dnn/src/ie_ngraph.cpp:494: error: (-215:Assertion failed) !inps.empty() in function 'createNet' + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Node Transpose_79 was not assigned on any pointed device + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // IE Exception: Ngraph operation Reshape with name 71 has dynamic output shape on 0 port, but CPU plug-in supports only static shape if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, @@ -653,7 +715,13 @@ TEST_P(Test_Model, TextRecognition) TEST_P(Test_Model, TextRecognitionWithCTCPrefixBeamSearch) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Node Transpose_79 was not assigned on any pointed device + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // IE Exception: Ngraph operation Reshape with name 71 has dynamic output shape on 0 port, but CPU plug-in supports only static shape if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, diff --git a/modules/dnn/test/test_onnx_conformance.cpp b/modules/dnn/test/test_onnx_conformance.cpp index 0e912ede54..e9bc0e4187 100644 --- a/modules/dnn/test/test_onnx_conformance.cpp +++ b/modules/dnn/test/test_onnx_conformance.cpp @@ -954,7 +954,7 @@ public: if (target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) { - default_l1 = 4e-3; + default_l1 = 7e-3; default_lInf = 2e-2; } else diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter__cuda_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter__cuda_denylist.inl.hpp index 0f5f387132..c18ced0c59 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_filter__cuda_denylist.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_filter__cuda_denylist.inl.hpp @@ -66,6 +66,15 @@ "test_maxunpool_export_with_output_shape", "test_mul_bcast", "test_mul_uint8", +"test_reduce_prod_default_axes_keepdims_example", // FP16 only +"test_reduce_prod_default_axes_keepdims_random", // FP16 only +"test_reduce_prod_do_not_keepdims_random", // FP16 only +"test_reduce_prod_keepdims_random", // FP16 only +"test_reduce_prod_negative_axes_keepdims_random", // FP16 only +"test_reduce_sum_square_default_axes_keepdims_random", // FP16 only +"test_reduce_sum_square_do_not_keepdims_random", // FP16 only +"test_reduce_sum_square_keepdims_random", // FP16 only +"test_reduce_sum_square_negative_axes_keepdims_random", // FP16 only "test_softmax_default_axis", "test_softmax_large_number", // FP16 only "test_softmax_large_number_expanded", // FP16 only diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp index bdd8f3b8b9..cad914d05a 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp @@ -1267,7 +1267,10 @@ CASE(test_reduce_l1_negative_axes_keep_dims_example) CASE(test_reduce_l1_negative_axes_keep_dims_random) // no filter CASE(test_reduce_l2_default_axes_keepdims_example) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + default_l1 = 0.01f; // Expected: (normL1) <= (l1), actual: 0.00490189 vs 0.004) +#endif CASE(test_reduce_l2_default_axes_keepdims_random) // no filter CASE(test_reduce_l2_do_not_keepdims_example) @@ -1291,7 +1294,10 @@ CASE(test_reduce_log_sum_default) CASE(test_reduce_log_sum_desc_axes) // no filter CASE(test_reduce_log_sum_exp_default_axes_keepdims_example) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + default_l1 = 0.01f; // Expected: (normL1) <= (l1), actual: 0.00671387 vs 0.004 +#endif CASE(test_reduce_log_sum_exp_default_axes_keepdims_random) // no filter CASE(test_reduce_log_sum_exp_do_not_keepdims_example) @@ -1357,21 +1363,61 @@ CASE(test_reduce_min_negative_axes_keepdims_example) CASE(test_reduce_min_negative_axes_keepdims_random) // no filter CASE(test_reduce_prod_default_axes_keepdims_example) - // no filter +#if SKIP_SET_1 + SKIP_MYRIAD; // accuracy (Expected: (normL1) <= (l1), actual: inf vs 0.004) +#endif CASE(test_reduce_prod_default_axes_keepdims_random) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + { + default_l1 = 5; // Expected: (normL1) <= (l1), actual: 2.66211 vs 0.004 |ref| = 24621.337890625 + default_lInf = 5; // Expected: (normInf) <= (lInf), actual: 2.66211 vs 0.02 |ref| = 24621.337890625 + } +#endif CASE(test_reduce_prod_do_not_keepdims_example) // no filter CASE(test_reduce_prod_do_not_keepdims_random) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + { + default_l1 = 0.01f; // Expected: (normL1) <= (l1), actual: 0.00436729 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0201836 vs 0.02 + } +#endif CASE(test_reduce_prod_keepdims_example) // no filter CASE(test_reduce_prod_keepdims_random) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + { + default_l1 = 0.01f; // Expected: (normL1) <= (l1), actual: 0.00436729 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0201836 vs 0.02 + } +#endif +#if INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (target == DNN_TARGET_OPENCL_FP16) + { + default_l1 = 0.01f; // Expected: (normL1) <= (l1), actual: 0.00436729 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0201836 vs 0.02 + } +#endif CASE(test_reduce_prod_negative_axes_keepdims_example) // no filter CASE(test_reduce_prod_negative_axes_keepdims_random) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + { + default_l1 = 0.01f; // Expected: (normL1) <= (l1), actual: 0.00436729 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0201836 vs 0.02 + } +#endif +#if INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (target == DNN_TARGET_OPENCL_FP16) + { + default_l1 = 0.01f; // Expected: (normL1) <= (l1), actual: 0.00436729 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0201836 vs 0.02 + } +#endif CASE(test_reduce_sum_default_axes_keepdims_example) // no filter CASE(test_reduce_sum_default_axes_keepdims_random) @@ -1395,19 +1441,61 @@ CASE(test_reduce_sum_negative_axes_keepdims_random) CASE(test_reduce_sum_square_default_axes_keepdims_example) // no filter CASE(test_reduce_sum_square_default_axes_keepdims_random) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + default_l1 = 0.05f; // Expected: (normL1) <= (l1), actual: 0.0183411 vs 0.004 +#endif CASE(test_reduce_sum_square_do_not_keepdims_example) // no filter CASE(test_reduce_sum_square_do_not_keepdims_random) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + { + default_l1 = 0.05f; // Expected: (normL1) <= (l1), actual: 0.010789 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0290298 vs 0.02 + } +#endif +#if INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (target == DNN_TARGET_OPENCL_FP16) + { + default_l1 = 0.01f; // Expected: (normL1) <= (l1), actual: 0.00723048 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0201416 vs 0.02 + } +#endif CASE(test_reduce_sum_square_keepdims_example) // no filter CASE(test_reduce_sum_square_keepdims_random) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + { + default_l1 = 0.05f; // Expected: (normL1) <= (l1), actual: 0.010789 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0290298 vs 0.02 + } +#endif +#if INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (target == DNN_TARGET_OPENCL_FP16) + { + default_l1 = 0.05f; // Expected: (normL1) <= (l1), actual: 0.010789 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0290298 vs 0.02 + } +#endif CASE(test_reduce_sum_square_negative_axes_keepdims_example) // no filter CASE(test_reduce_sum_square_negative_axes_keepdims_random) - // no filter +#if SKIP_SET_1 + if (target == DNN_TARGET_MYRIAD) + { + default_l1 = 0.05f; // Expected: (normL1) <= (l1), actual: 0.010789 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0290298 vs 0.02 + } +#endif +#if INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (target == DNN_TARGET_OPENCL_FP16) + { + default_l1 = 0.05f; // Expected: (normL1) <= (l1), actual: 0.010789 vs 0.004 + default_lInf = 0.05f; // Expected: (normInf) <= (lInf), actual: 0.0290298 vs 0.02 + } +#endif CASE(test_reflect_pad) // no filter CASE(test_relu) diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp index ccd1568845..c2425d469f 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp @@ -20,3 +20,14 @@ "test_split_equal_parts_2d", "test_split_equal_parts_default_axis", "test_tan", +"test_reduce_l2_default_axes_keepdims_example", // Expected: (normL1) <= (l1), actual: 0.00490189 vs 0.004 +"test_reduce_log_sum_exp_default_axes_keepdims_example", // Expected: (normL1) <= (l1), actual: 0.00671387 vs 0.004 +"test_reduce_prod_default_axes_keepdims_example", // Expected: (normL1) <= (l1), actual: inf vs 0.004 +"test_reduce_prod_default_axes_keepdims_random", // Expected: (normL1) <= (l1), actual: 18.6621 vs 0.004, Expected: (normInf) <= (lInf), actual: 18.6621 vs 0.02 +"test_reduce_prod_do_not_keepdims_random", // Expected: (normL1) <= (l1), actual: 0.00436729 vs 0.004, Expected: (normInf) <= (lInf), actual: 0.0201836 vs 0.02 +"test_reduce_prod_keepdims_random", // Expected: (normL1) <= (l1), actual: 0.00436729 vs 0.004, Expected: (normInf) <= (lInf), actual: 0.0201836 vs 0.02 +"test_reduce_prod_negative_axes_keepdims_random", // Expected: (normL1) <= (l1), actual: 0.00436729 vs 0.004, Expected: (normInf) <= (lInf), actual: 0.0201836 vs 0.02 +"test_reduce_sum_square_default_axes_keepdims_random", // Expected: (normL1) <= (l1), actual: 0.0183411 vs 0.004 +"test_reduce_sum_square_do_not_keepdims_random", // Expected: (normL1) <= (l1), actual: 0.010789 vs 0.004, Expected: (normInf) <= (lInf), actual: 0.0290298 vs 0.02 +"test_reduce_sum_square_keepdims_random", // Expected: (normL1) <= (l1), actual: 0.010789 vs 0.004, Expected: (normInf) <= (lInf), actual: 0.0290298 vs 0.02 +"test_reduce_sum_square_negative_axes_keepdims_random", // Expected: (normL1) <= (l1), actual: 0.010789 vs 0.004, Expected: (normInf) <= (lInf), actual: 0.0290298 vs 0.02 \ No newline at end of file diff --git a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp index e5d0ead9da..49db810c7f 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp @@ -337,53 +337,6 @@ "test_range_float_type_positive_delta_expanded", "test_range_int32_type_negative_delta", "test_range_int32_type_negative_delta_expanded", -"test_reciprocal", -"test_reciprocal_example", -"test_reduce_l1_default_axes_keepdims_example", -"test_reduce_l1_default_axes_keepdims_random", -"test_reduce_l1_do_not_keepdims_example", -"test_reduce_l1_do_not_keepdims_random", -"test_reduce_l1_keep_dims_example", -"test_reduce_l1_keep_dims_random", -"test_reduce_l1_negative_axes_keep_dims_example", -"test_reduce_l1_negative_axes_keep_dims_random", -"test_reduce_l2_default_axes_keepdims_example", -"test_reduce_l2_default_axes_keepdims_random", -"test_reduce_l2_do_not_keepdims_example", -"test_reduce_l2_do_not_keepdims_random", -"test_reduce_l2_keep_dims_example", -"test_reduce_l2_keep_dims_random", -"test_reduce_l2_negative_axes_keep_dims_example", -"test_reduce_l2_negative_axes_keep_dims_random", -"test_reduce_log_sum", -"test_reduce_log_sum_asc_axes", -"test_reduce_log_sum_default", -"test_reduce_log_sum_desc_axes", -"test_reduce_log_sum_exp_default_axes_keepdims_example", -"test_reduce_log_sum_exp_default_axes_keepdims_random", -"test_reduce_log_sum_exp_do_not_keepdims_example", -"test_reduce_log_sum_exp_do_not_keepdims_random", -"test_reduce_log_sum_exp_keepdims_example", -"test_reduce_log_sum_exp_keepdims_random", -"test_reduce_log_sum_exp_negative_axes_keepdims_example", -"test_reduce_log_sum_exp_negative_axes_keepdims_random", -"test_reduce_log_sum_negative_axes", -"test_reduce_min_default_axes_keepdims_example", -"test_reduce_min_default_axes_keepdims_random", -"test_reduce_min_do_not_keepdims_example", -"test_reduce_min_do_not_keepdims_random", -"test_reduce_min_keepdims_example", -"test_reduce_min_keepdims_random", -"test_reduce_min_negative_axes_keepdims_example", -"test_reduce_min_negative_axes_keepdims_random", -"test_reduce_prod_default_axes_keepdims_example", -"test_reduce_prod_default_axes_keepdims_random", -"test_reduce_prod_do_not_keepdims_example", -"test_reduce_prod_do_not_keepdims_random", -"test_reduce_prod_keepdims_example", -"test_reduce_prod_keepdims_random", -"test_reduce_prod_negative_axes_keepdims_example", -"test_reduce_prod_negative_axes_keepdims_random", "test_reduce_sum_default_axes_keepdims_example", "test_reduce_sum_default_axes_keepdims_random", "test_reduce_sum_do_not_keepdims_example", @@ -394,14 +347,6 @@ "test_reduce_sum_keepdims_random", "test_reduce_sum_negative_axes_keepdims_example", "test_reduce_sum_negative_axes_keepdims_random", -"test_reduce_sum_square_default_axes_keepdims_example", -"test_reduce_sum_square_default_axes_keepdims_random", -"test_reduce_sum_square_do_not_keepdims_example", -"test_reduce_sum_square_do_not_keepdims_random", -"test_reduce_sum_square_keepdims_example", -"test_reduce_sum_square_keepdims_random", -"test_reduce_sum_square_negative_axes_keepdims_example", -"test_reduce_sum_square_negative_axes_keepdims_random", "test_reflect_pad", "test_reshape_allowzero_reordered", "test_reshape_extended_dims", @@ -532,9 +477,6 @@ "test_shape_start_1_end_2", "test_shape_start_1_end_negative_1", "test_shape_start_negative_1", -"test_shrink_hard", -"test_shrink_soft", -"test_sign", "test_simple_rnn_batchwise", "test_simple_rnn_defaults", "test_simple_rnn_with_initial_bias", diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 4918c72d10..8503f55c25 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -106,6 +106,12 @@ TEST_P(Test_ONNX_layers, MaxPooling) } TEST_P(Test_ONNX_layers, MaxPooling_2) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + testONNXModels("two_maxpooling", npy, 0, 0, false, false); } @@ -148,18 +154,31 @@ TEST_P(Test_ONNX_layers, Convolution_variable_weight) TEST_P(Test_ONNX_layers, Convolution_variable_weight_bias) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // openvino/src/plugins/intel_myriad/common/src/ngraph/transformations/extract_dynamic_batch/slice_convolution.cpp:14 Expecting operation v1::GroupConvolution GroupConvolution_6904725 (Reshape_17[0]:f32{1,4,5,5}, Reshape_6904719[0]:f32{4,1,1,2,2}) -> (f32{1,4,4,4}) to have constant kernel, got Reshape_6904719[0]:f32{4,1,1,2,2} + // openvino\src\plugins\intel_myriad\common\src\ngraph\transformations\extract_dynamic_batch\slice_convolution.cpp:15 Expecting operation v1::GroupConvolution GroupConvolution_6904692 (Reshape_17[0]:f32{1,4,5,5}, Reshape_6904686[0]:f32{4,1,1,2,2}) -> (f32{1,4,4,4}) to have constant kernel, got Reshape_6904686[0]:f32{4,1,1,2,2} + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // accuracy (depends on OpenCL version / HW) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH || backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); - if (backend == DNN_BACKEND_CUDA) - applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported - if (backend == DNN_BACKEND_VKCOM) - applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU && getInferenceEngineCPUType() == CV_DNN_INFERENCE_ENGINE_CPU_TYPE_ARM_COMPUTE) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_ARM_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); +#endif + + if (backend == DNN_BACKEND_CUDA) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // supports only <= 2 inputs + + if (backend == DNN_BACKEND_VKCOM) + applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported String basename = "conv_variable_wb"; Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); @@ -229,7 +248,15 @@ TEST_P(Test_ONNX_layers, Deconvolution) TEST_P(Test_ONNX_layers, Deconvolution3D) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + // [ GENERAL_ERROR ] openvino/src/plugins/intel_myriad/graph_transformer/src/frontend/frontend.cpp:592 Failed to compile layer "2": + // [ GENERAL_ERROR ] openvino/src/plugins/intel_myriad/graph_transformer/src/model/model.cpp:198 duplicateData error: while duplicating 2@weights Const data got different desc and content byte sizes (162 and 486 respectively) + if (target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { // [ GENERAL_ERROR ] vpu/graph_transformer/src/frontend/frontend.cpp:439 Failed to compile layer "2": @@ -250,7 +277,15 @@ TEST_P(Test_ONNX_layers, Deconvolution3D) TEST_P(Test_ONNX_layers, Deconvolution3D_bias) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + // [ GENERAL_ERROR ] openvino/src/plugins/intel_myriad/graph_transformer/src/frontend/frontend.cpp:592 Failed to compile layer "3": + // [ GENERAL_ERROR ] openvino/src/plugins/intel_myriad/graph_transformer/src/model/model.cpp:198 duplicateData error: while duplicating 3@weights Const data got different desc and content byte sizes (270 and 810 respectively) + if (target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { // [ GENERAL_ERROR ] vpu/graph_transformer/src/frontend/frontend.cpp:439 Failed to compile layer "2": @@ -271,7 +306,15 @@ TEST_P(Test_ONNX_layers, Deconvolution3D_bias) TEST_P(Test_ONNX_layers, Deconvolution3D_pad) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + // [ GENERAL_ERROR ] openvino/src/plugins/intel_myriad/graph_transformer/src/frontend/frontend.cpp:592 Failed to compile layer "3": + // [ GENERAL_ERROR ] openvino/src/plugins/intel_myriad/graph_transformer/src/model/model.cpp:198 duplicateData error: while duplicating 3@weights Const data got different desc and content byte sizes (108 and 432 respectively) + if (target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { // [ GENERAL_ERROR ] vpu/graph_transformer/src/frontend/frontend.cpp:439 Failed to compile layer "2": @@ -292,7 +335,15 @@ TEST_P(Test_ONNX_layers, Deconvolution3D_pad) TEST_P(Test_ONNX_layers, Deconvolution3D_adjpad) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + // [ GENERAL_ERROR ] openvino/src/plugins/intel_myriad/graph_transformer/src/frontend/frontend.cpp:592 Failed to compile layer "3": + // [ GENERAL_ERROR ] openvino/src/plugins/intel_myriad/graph_transformer/src/model/model.cpp:198 duplicateData error: while duplicating 3@weights Const data got different desc and content byte sizes (90 and 180 respectively) + if (target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { // [ GENERAL_ERROR ] vpu/graph_transformer/src/frontend/frontend.cpp:439 Failed to compile layer "2": @@ -358,7 +409,18 @@ TEST_P(Test_ONNX_layers, ReduceSum) TEST_P(Test_ONNX_layers, ReduceMax) { testONNXModels("reduce_max"); +} +TEST_P(Test_ONNX_layers, ReduceMax_axis_0) +{ testONNXModels("reduce_max_axis_0"); +} +TEST_P(Test_ONNX_layers, ReduceMax_axis_1) +{ +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // [ GENERAL_ERROR ] AssertionFailed: !out.networkInputs.empty() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif testONNXModels("reduce_max_axis_1"); } @@ -378,10 +440,40 @@ TEST_P(Test_ONNX_layers, ArgLayer) TEST_P(Test_ONNX_layers, Scale) { - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // accuracy (inf/nan) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // IE exception: mkldnn_node.cpp:238 Ngraph operation Reshape with name ReduceMean_0 has dynamic output shape on 0 port, but CPU plug-in supports only static shape + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // Ngraph operation Reshape with name ReduceMean_0 has dynamic output shape on 0 port, but CPU plug-in supports only static shape + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif testONNXModels("scale"); +} + +TEST_P(Test_ONNX_layers, Scale_broadcast) +{ + if (backend == DNN_BACKEND_CUDA) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // doesn't support broadcasting testONNXModels("scale_broadcast", npy, 0, 0, false, true, 3); +} + +TEST_P(Test_ONNX_layers, Scale_broadcast_mid) +{ + if (backend == DNN_BACKEND_CUDA) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // doesn't support broadcasting testONNXModels("scale_broadcast_mid", npy, 0, 0, false, true, 2); } @@ -481,7 +573,19 @@ TEST_P(Test_ONNX_layers, Elementwise_Sqrt) TEST_P(Test_ONNX_layers, Elementwise_not) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) @@ -492,145 +596,151 @@ TEST_P(Test_ONNX_layers, Elementwise_not) TEST_P(Test_ONNX_layers, Compare_EQ) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) - applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, - CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION - ); - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_MYRIAD) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); - } -#endif + testONNXModels("equal"); } TEST_P(Test_ONNX_layers, Compare_GT) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) - applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, - CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION - ); - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_MYRIAD) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); - } -#endif + testONNXModels("greater"); } TEST_P(Test_ONNX_layers, Compare_LT) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) - applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, - CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION - ); - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_MYRIAD) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); - } -#endif + testONNXModels("less"); } TEST_P(Test_ONNX_layers, CompareSameDims_EQ) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) - applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, - CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION - ); - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_MYRIAD) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); - } -#endif + testONNXModels("equal_same_dims", npy, 0, 0, false, true, 2); } TEST_P(Test_ONNX_layers, CompareSameDims_GT) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) - applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, - CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION - ); - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_MYRIAD) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); - } -#endif + testONNXModels("greater_same_dims", npy, 0, 0, false, true, 2); } TEST_P(Test_ONNX_layers, CompareSameDims_LT) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // IE exception: Function contains several inputs and outputs with one friendly name! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - { - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) - applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, - CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION - ); - // IE exception: Function contains several inputs and outputs with one friendly name! - if (target == DNN_TARGET_MYRIAD) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); - } -#endif + testONNXModels("less_same_dims", npy, 0, 0, false, true, 2); } @@ -664,7 +774,19 @@ TEST_P(Test_ONNX_layers, AveragePooling) TEST_P(Test_ONNX_layers, MaxPooling3D) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + // accuracy + if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // IE exception: [ GENERAL_ERROR ] AssertionFailed: !expired() + if (target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { // accuracy @@ -797,8 +919,14 @@ TEST_P(Test_ONNX_layers, MatMul) TEST_P(Test_ONNX_layers, MatMulAdd) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021010000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); +#endif if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); testONNXModels("matmul_add"); @@ -990,6 +1118,17 @@ TEST_P(Test_ONNX_layers, Slice_Steps_2DInput) TEST_P(Test_ONNX_layers, Slice_Steps_3DInput) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#endif + testONNXModels("slice_opset_11_steps_3d"); } @@ -1000,6 +1139,20 @@ TEST_P(Test_ONNX_layers, Slice_Steps_4DInput) TEST_P(Test_ONNX_layers, Slice_Steps_5DInput) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // IE exception: Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + testONNXModels("slice_opset_11_steps_5d"); } @@ -1021,7 +1174,16 @@ TEST_P(Test_ONNX_layers, Split_EltwiseMax) TEST_P(Test_ONNX_layers, LSTM_Activations) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // IE exception: Node Block1326/lstm/reshape_0/permute was not assigned on any pointed device + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // IE Exception: Ngraph operation Reshape with name Block1237_Output_0_before_reshape has dynamic output shape on 0 port, but CPU plug-in supports only static shape if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, @@ -1032,12 +1194,14 @@ TEST_P(Test_ONNX_layers, LSTM_Activations) testONNXModels("lstm_cntk_tanh", pb, 0, 0, false, false); } -TEST_P(Test_ONNX_layers, LSTM) +// disabled due to poor handling of 1-d mats +TEST_P(Test_ONNX_layers, DISABLED_LSTM) { testONNXModels("lstm", npy, 0, 0, false, false); } -TEST_P(Test_ONNX_layers, LSTM_bidirectional) +// disabled due to poor handling of 1-d mats +TEST_P(Test_ONNX_layers, DISABLED_LSTM_bidirectional) { testONNXModels("lstm_bidirectional", npy, 0, 0, false, false); } @@ -1049,11 +1213,32 @@ TEST_P(Test_ONNX_layers, LSTM_hidden) TEST_P(Test_ONNX_layers, LSTM_hidden_bidirectional) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // IE exception: Node Transpose_45 was not assigned on any pointed device. + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#endif + testONNXModels("hidden_lstm_bi", npy, 0, 0, false, false); } TEST_P(Test_ONNX_layers, GRU) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // IE exception: Node GRU_22 was not assigned on any pointed device + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#endif testONNXModels("gru", npy, 0, 0, false, false); } @@ -1062,6 +1247,41 @@ TEST_P(Test_ONNX_layers, GRU_bidirectional) testONNXModels("gru_bi", npy, 0, 0, false, false); } +TEST_P(Test_ONNX_layers, LSTM_cell_forward) +{ +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // accuracy! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // Ngraph operation Reshape with name LSTM_16/lstm_y/reshape has dynamic output shape on 0 port, but CPU plug-in supports only static shape + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + testONNXModels("lstm_cell_forward", npy, 0, 0, false, false); +} +TEST_P(Test_ONNX_layers, LSTM_cell_bidirectional) +{ +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // Ngraph operation Reshape with name LSTM_16/lstm_y/reshape has dynamic output shape on 0 port, but CPU plug-in supports only static shape + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + testONNXModels("lstm_cell_bidirectional", npy, 0, 0, false, false); +} +TEST_P(Test_ONNX_layers, LSTM_cell_with_peepholes) +{ + testONNXModels("lstm_cell_with_peepholes", npy, 0, 0, false, false); +} + TEST_P(Test_ONNX_layers, Pad2d_Unfused) { testONNXModels("ReflectionPad2d"); @@ -1248,14 +1468,19 @@ TEST_P(Test_ONNX_layers, DynamicAxes_gather) TEST_P(Test_ONNX_layers, DynamicAxes_gather_scalar) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) // accuracy if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION ); -#endif -#if defined(INF_ENGINE_RELEASE) +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); @@ -1397,7 +1622,10 @@ TEST_P(Test_ONNX_layers, MaxPool1d) TEST_P(Test_ONNX_layers, MaxPoolSigmoid1d) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); @@ -1412,7 +1640,16 @@ TEST_P(Test_ONNX_layers, MaxPoolSigmoid1d) TEST_P(Test_ONNX_layers, MaxPool1d_Twise) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); @@ -1427,7 +1664,11 @@ TEST_P(Test_ONNX_layers, MaxPool1d_Twise) TEST_P(Test_ONNX_layers, AvePool1d) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); @@ -1442,7 +1683,19 @@ TEST_P(Test_ONNX_layers, AvePool1d) TEST_P(Test_ONNX_layers, PoolConv1d) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); @@ -1643,18 +1896,21 @@ TEST_P(Test_ONNX_nets, Squeezenet) TEST_P(Test_ONNX_nets, Googlenet) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // accuracy + if (target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // accuracy + if (target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) - // accuracy - if (target == DNN_TARGET_MYRIAD) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif const String model = _tf("models/googlenet.onnx", false); @@ -1880,7 +2136,7 @@ TEST_P(Test_ONNX_nets, Emotion_ferplus) double lInf = default_lInf; // Output values are in range [-2.011, 2.111] - if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + if ((backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) || (target == DNN_TARGET_CUDA_FP16)) l1 = 0.007; else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_OPENCL_FP16) { @@ -1939,19 +2195,27 @@ TEST_P(Test_ONNX_nets, Shufflenet) TEST_P(Test_ONNX_nets, Resnet34_kinetics) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target != DNN_TARGET_CPU) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); // Only CPU on DLIE backend is supported - else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // Only CPU on DLIE backend is supported -#endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // IE exception: Failed to allocate graph: MYRIAD device is not opened + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { // IE exception: Function contains several inputs and outputs with one friendly name! if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target != DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); // Only CPU on DLIE backend is supported + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // Only CPU on DLIE backend is supported #endif if (backend == DNN_BACKEND_OPENCV && target != DNN_TARGET_CPU) throw SkipTestException("Only CPU is supported"); // FIXIT use tags diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 7a4bfc96f2..b33d067780 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -215,7 +215,14 @@ TEST_P(Test_TensorFlow_layers, conv_keras_atrous_conv2d_same) } TEST_P(Test_TensorFlow_layers, conv_pool_nchw) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // [ GENERAL_ERROR ] AssertionFailed: !expired() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif @@ -275,13 +282,25 @@ TEST_P(Test_TensorFlow_layers, padding_asymmetric_3) TEST_P(Test_TensorFlow_layers, padding_asymmetric_4) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Unsupported pad value + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) // Exception: Unsupported pad value + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) // Exception: Unsupported pad value applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) // Exception: Unsupported pad value applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif #endif runTensorFlowNet("max_pool2d_asymmetric_pads_nhwc"); } @@ -470,7 +489,14 @@ TEST_P(Test_TensorFlow_layers, slim_batch_norm) TEST_P(Test_TensorFlow_layers, pooling_max_pool_even) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // [ GENERAL_ERROR ] AssertionFailed: !expired() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif @@ -478,7 +504,14 @@ TEST_P(Test_TensorFlow_layers, pooling_max_pool_even) } TEST_P(Test_TensorFlow_layers, pooling_max_pool_odd_valid) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // [ GENERAL_ERROR ] AssertionFailed: !expired() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif @@ -486,7 +519,14 @@ TEST_P(Test_TensorFlow_layers, pooling_max_pool_odd_valid) } TEST_P(Test_TensorFlow_layers, pooling_max_pool_odd_same) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // [ GENERAL_ERROR ] AssertionFailed: !expired() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif @@ -605,7 +645,16 @@ TEST_P(Test_TensorFlow_layers, ave_pool_same) TEST_P(Test_TensorFlow_layers, MaxPooling3D) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // IE exception: [ GENERAL_ERROR ] AssertionFailed: !expired() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { // accuracy @@ -617,8 +666,7 @@ TEST_P(Test_TensorFlow_layers, MaxPooling3D) if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); } -#endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target != DNN_TARGET_CPU) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); // Only CPU on DLIE backend is supported else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) @@ -794,14 +842,19 @@ TEST_P(Test_TensorFlow_layers, ExpandDims) // TODO: fix it and add to l2_normalize TEST_P(Test_TensorFlow_layers, l2_normalize_3d) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 - && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) - ) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, - CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION); -#endif -#if defined(INF_ENGINE_RELEASE) + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); + // accuracy + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) @@ -1058,15 +1111,28 @@ TEST_P(Test_TensorFlow_nets, Faster_RCNN_resnet50_coco_2018_01_28) CV_TEST_TAG_DEBUG_VERYLONG ); -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // [ GENERAL_ERROR ] AssertionFailed: subgraphTopoSortsStep < subgraphs.size() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // [ GENERAL_ERROR ] AssertionFailed: subgraphTopoSortsStep < subgraphs.size() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) + // [ GENERAL_ERROR ] AssertionFailed: subgraphTopoSortsStep++ < subgraphs.size() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); // IE exception: Ngraph operation Transpose with name FirstStageBoxPredictor/ClassPredictor/reshape_1/nhwc has dynamic output shape on 0 port, but CPU plug-in supports only static shape if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION ); -#endif - -#ifdef INF_ENGINE_RELEASE +#elif defined(INF_ENGINE_RELEASE) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (INF_ENGINE_VER_MAJOR_LT(2019020000) || target != DNN_TARGET_CPU)) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION); @@ -1074,12 +1140,12 @@ TEST_P(Test_TensorFlow_nets, Faster_RCNN_resnet50_coco_2018_01_28) if (INF_ENGINE_VER_MAJOR_GT(2019030000) && backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); -#endif #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) // segfault: inference-engine/thirdparty/clDNN/src/gpu/detection_output_cpu.cpp:111: // Assertion `prior_height > 0' failed. if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); +#endif #endif if (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16) @@ -1287,7 +1353,14 @@ TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_single_conv) } TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_max_pool_odd_same) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // [ GENERAL_ERROR ] AssertionFailed: !expired() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif @@ -1311,7 +1384,14 @@ TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_padding_valid) } TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_max_pool_even) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // [ GENERAL_ERROR ] AssertionFailed: !expired() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif @@ -1331,7 +1411,14 @@ TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_deconvolution) } TEST_P(Test_TensorFlow_layers, fp16_weights_fp16_max_pool_odd_valid) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // [ GENERAL_ERROR ] AssertionFailed: !expired() + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2020020000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif @@ -1364,13 +1451,11 @@ TEST_P(Test_TensorFlow_layers, lstm) { if(backend == DNN_BACKEND_CUDA) applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); /* not supported */ -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); -#endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Xlink, Failed to allocate graph: NC_ERROR + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { // Exception: Ngraph operation Reshape with name Reshape has dynamic output shape on 0 port, but CPU plug-in supports only static shape @@ -1382,6 +1467,11 @@ TEST_P(Test_TensorFlow_layers, lstm) if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); @@ -1411,7 +1501,20 @@ TEST_P(Test_TensorFlow_layers, split_equals) TEST_P(Test_TensorFlow_layers, resize_nearest_neighbor) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#endif runTensorFlowNet("resize_nearest_neighbor"); +} +TEST_P(Test_TensorFlow_layers, resize_nearest_neighbor_keras_upsampling2d) +{ runTensorFlowNet("keras_upsampling2d"); } @@ -1435,25 +1538,30 @@ TEST_P(Test_TensorFlow_layers, fused_resize_conv) runTensorFlowNet("fused_resize_conv"); } -TEST_P(Test_TensorFlow_layers, slice) +TEST_P(Test_TensorFlow_layers, slice_crop2d) { - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && - (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) - applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, - CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); double l1 = target == DNN_TARGET_MYRIAD ? 4.9e-3 : default_l1; runTensorFlowNet("crop2d", false, l1); +} +TEST_P(Test_TensorFlow_layers, slice_4d) +{ runTensorFlowNet("slice_4d"); +} +TEST_P(Test_TensorFlow_layers, slice_strided) +{ runTensorFlowNet("strided_slice"); } -TEST_P(Test_TensorFlow_layers, softmax) +TEST_P(Test_TensorFlow_layers, softmax_keras) { runTensorFlowNet("keras_softmax"); +} +TEST_P(Test_TensorFlow_layers, softmax_slim) +{ runTensorFlowNet("slim_softmax"); } -TEST_P(Test_TensorFlow_layers, slim_softmax_v2) +TEST_P(Test_TensorFlow_layers, softmax_slim_v2) { #if defined(INF_ENGINE_RELEASE) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD && @@ -1472,10 +1580,19 @@ TEST_P(Test_TensorFlow_layers, relu6) TEST_P(Test_TensorFlow_layers, subpixel) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#elif defined(INF_ENGINE_RELEASE) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); +#endif runTensorFlowNet("subpixel"); } @@ -1566,13 +1683,16 @@ TEST_P(Test_TensorFlow_layers, tf2_prelu) { if (backend == DNN_BACKEND_CUDA) applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported; only across channels is supported -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); -#endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Eltwise executor got invalid input/output dims configuration + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Input prelu:StatefulPartitionedCall/StatefulPartitionedCall/sequential/p_re_lu/add hasn't been found in primitiveIDs map + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { // IE exception: Input prelu:StatefulPartitionedCall/StatefulPartitionedCall/sequential/p_re_lu/add hasn't been found in primitiveIDs map @@ -1584,6 +1704,11 @@ TEST_P(Test_TensorFlow_layers, tf2_prelu) if (target == DNN_TARGET_CPU) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); } +#elif defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); #endif runTensorFlowNet("tf2_prelu"); @@ -1594,6 +1719,13 @@ TEST_P(Test_TensorFlow_layers, tf2_permute_nhwc_ncwh) runTensorFlowNet("tf2_permute_nhwc_ncwh"); } +// issue #21852 +TEST_P(Test_TensorFlow_layers, tf_graph_simplifier_buffer_overflow) +{ + // This just shouldn't segfault, otherwise it's fine + EXPECT_ANY_THROW(readNetFromTensorflow(path("tf_graph_simplifier_buffer_overflow_net.pb"))); +} + TEST_P(Test_TensorFlow_layers, squeeze) { #if defined(INF_ENGINE_RELEASE) diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index 1365df78ea..520887480d 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -287,6 +287,21 @@ TEST_P(Test_Torch_layers, net_normalize) { if(backend == DNN_BACKEND_CUDA) applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); /* only L1 and L2 norms are supported */ + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000) + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + // Cannot get memory! + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION + ); +#endif + runTorchNet("net_normalize", "", false, true); } @@ -551,6 +566,8 @@ TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy) double normL1 = cvtest::norm(refBlob, out, cv::NORM_L1) / refBlob.total(); if (target == DNN_TARGET_MYRIAD) EXPECT_LE(normL1, 4.0f); + else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + EXPECT_LE(normL1, 1.0f); else EXPECT_LE(normL1, 0.6f); } diff --git a/modules/gapi/CMakeLists.txt b/modules/gapi/CMakeLists.txt index cd47421ea7..a0b56d5e61 100644 --- a/modules/gapi/CMakeLists.txt +++ b/modules/gapi/CMakeLists.txt @@ -138,7 +138,7 @@ set(gapi_srcs # OAK Backend (optional) src/backends/oak/goak.cpp src/backends/oak/goakbackend.cpp - src/backends/oak/goak_media_adapter.cpp + src/backends/oak/goak_memory_adapters.cpp # OCL Backend (currently built-in) src/backends/ocl/goclbackend.cpp @@ -186,6 +186,7 @@ set(gapi_srcs src/streaming/onevpl/cfg_params_parser.cpp src/streaming/onevpl/utils.cpp src/streaming/onevpl/data_provider_interface_exception.cpp + src/streaming/onevpl/accelerators/surface/base_frame_adapter.cpp src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.cpp src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp src/streaming/onevpl/accelerators/surface/surface.cpp @@ -200,6 +201,10 @@ set(gapi_srcs src/streaming/onevpl/engine/decode/decode_session.cpp src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp src/streaming/onevpl/engine/transcode/transcode_session.cpp + src/streaming/onevpl/engine/preproc/preproc_engine.cpp + src/streaming/onevpl/engine/preproc/preproc_session.cpp + src/streaming/onevpl/engine/preproc/preproc_dispatcher.cpp + src/streaming/onevpl/engine/preproc_engine_interface.cpp src/streaming/onevpl/demux/async_mfp_demux_data_provider.cpp src/streaming/onevpl/data_provider_dispatcher.cpp @@ -360,7 +365,9 @@ endif() # perf test dependencies postprocessing if(HAVE_GAPI_ONEVPL) # NB: TARGET opencv_perf_gapi doesn't exist before `ocv_add_perf_tests` + # src/ is specified to include dirs for INTERNAL tests only. if(TARGET opencv_perf_gapi) + target_include_directories(opencv_perf_gapi PRIVATE "${CMAKE_CURRENT_LIST_DIR}/src") ocv_target_compile_definitions(opencv_perf_gapi PRIVATE -DHAVE_ONEVPL) ocv_target_link_libraries(opencv_perf_gapi PRIVATE ${VPL_IMPORTED_TARGETS}) if(HAVE_D3D11 AND HAVE_OPENCL) @@ -368,13 +375,3 @@ if(HAVE_GAPI_ONEVPL) endif() endif() endif() - -if(HAVE_OAK) - # FIXME: consider better solution - if(TARGET example_gapi_oak_rgb_camera_encoding) - ocv_target_compile_definitions(example_gapi_oak_rgb_camera_encoding PRIVATE -DHAVE_OAK) - endif() - if(TARGET example_gapi_oak_small_hetero_pipeline) - ocv_target_compile_definitions(example_gapi_oak_small_hetero_pipeline PRIVATE -DHAVE_OAK) - endif() -endif() diff --git a/modules/gapi/doc/slides/gapi_overview.org b/modules/gapi/doc/slides/gapi_overview.org index 676c914b66..7ed85baeca 100644 --- a/modules/gapi/doc/slides/gapi_overview.org +++ b/modules/gapi/doc/slides/gapi_overview.org @@ -592,7 +592,7 @@ subgraph cluster_3 {style=filled;color=azure2; C}; *** But how does it run? -- Since ~infer~ is an *Operation*, backends may provide *Kernels* implenting it; +- Since ~infer~ is an *Operation*, backends may provide *Kernels* implementing it; - The only publicly available inference backend now is *OpenVINO™*: - Brings its ~infer~ kernel atop of the Inference Engine; - NN model data is passed through G-API compile arguments (like kernels); diff --git a/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp b/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp index 48909a84fc..ff3ee45ed3 100644 --- a/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp +++ b/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp @@ -2,12 +2,17 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation #ifndef OPENCV_GAPI_GCPUKERNEL_HPP #define OPENCV_GAPI_GCPUKERNEL_HPP +#ifdef _MSC_VER +#pragma warning(disable: 4702) // "Unreachable code" +// on postprocess(...) call inside OCVCallHelper +#endif + #include #include #include diff --git a/modules/gapi/include/opencv2/gapi/garray.hpp b/modules/gapi/include/opencv2/gapi/garray.hpp index 17b03332e0..55f4d11b12 100644 --- a/modules/gapi/include/opencv2/gapi/garray.hpp +++ b/modules/gapi/include/opencv2/gapi/garray.hpp @@ -381,7 +381,7 @@ public: * * @note The value of `cv::GArray` may be overwritten by assigning some * other `cv::GArray` to the object using `operator=` -- on the - * assigment, the old association or value is discarded. + * assignment, the old association or value is discarded. * * @param v a std::vector to associate with this * `cv::GArray` object. Vector data is copied into the diff --git a/modules/gapi/include/opencv2/gapi/gframe.hpp b/modules/gapi/include/opencv2/gapi/gframe.hpp index af5fc6bee5..54fb30789e 100644 --- a/modules/gapi/include/opencv2/gapi/gframe.hpp +++ b/modules/gapi/include/opencv2/gapi/gframe.hpp @@ -48,7 +48,7 @@ struct GOrigin; * operations like BGR(), Y(), UV() -- these operations provide * access to frame's data in the familiar cv::GMat form, which can be * used with the majority of the existing G-API operations. These - * accessor functions may perform color space converion on the fly if + * accessor functions may perform color space conversion on the fly if * the image format of the GFrame they are applied to differs from the * operation's semantic (e.g. the BGR() accessor is called on an NV12 * image frame). diff --git a/modules/gapi/include/opencv2/gapi/gscalar.hpp b/modules/gapi/include/opencv2/gapi/gscalar.hpp index 7ebededcf0..d8a47c8ea8 100644 --- a/modules/gapi/include/opencv2/gapi/gscalar.hpp +++ b/modules/gapi/include/opencv2/gapi/gscalar.hpp @@ -63,7 +63,7 @@ public: * * @note The value of GScalar may be overwritten by assigning some * other GScalar to the object using `operator=` -- on the - * assigment, the old GScalar value is discarded. + * assignment, the old GScalar value is discarded. * * @param s a cv::Scalar value to associate with this GScalar object. */ diff --git a/modules/gapi/include/opencv2/gapi/gstreaming.hpp b/modules/gapi/include/opencv2/gapi/gstreaming.hpp index 890eb584fb..5677768a96 100644 --- a/modules/gapi/include/opencv2/gapi/gstreaming.hpp +++ b/modules/gapi/include/opencv2/gapi/gstreaming.hpp @@ -225,7 +225,7 @@ public: * setSource() to run the graph on a new video stream. * * @overload - * @param args arguments used to contruct and initialize a stream + * @param args arguments used to construct and initialize a stream * source. */ template diff --git a/modules/gapi/include/opencv2/gapi/gtype_traits.hpp b/modules/gapi/include/opencv2/gapi/gtype_traits.hpp index 2b43421907..b56175788f 100644 --- a/modules/gapi/include/opencv2/gapi/gtype_traits.hpp +++ b/modules/gapi/include/opencv2/gapi/gtype_traits.hpp @@ -204,7 +204,7 @@ namespace detail { static_assert(!(cv::detail::has_gshape>::value || cv::detail::contains::type, GAPI_OWN_TYPES_LIST>::value), - "gin/gout must not be used with G* classses or cv::gapi::own::*"); + "gin/gout must not be used with G* classes or cv::gapi::own::*"); return GTypeTraits::wrap_out(u); } }; diff --git a/modules/gapi/include/opencv2/gapi/imgproc.hpp b/modules/gapi/include/opencv2/gapi/imgproc.hpp index de6f3bcdb9..44f0528153 100644 --- a/modules/gapi/include/opencv2/gapi/imgproc.hpp +++ b/modules/gapi/include/opencv2/gapi/imgproc.hpp @@ -1241,7 +1241,7 @@ or column if there are N channels, or have N columns if there is a single channe @param src Input set of 2D points stored in one of possible containers: Mat, std::vector, std::vector, std::vector. @param distType Distance used by the M-estimator, see #DistanceTypes. @ref DIST_USER -and @ref DIST_C are not suppored. +and @ref DIST_C are not supported. @param param Numerical parameter ( C ) for some types of distances. If it is 0, an optimal value is chosen. @param reps Sufficient accuracy for the radius (distance between the coordinate origin and the @@ -1313,7 +1313,7 @@ or column if there are N channels, or have N columns if there is a single channe @param src Input set of 3D points stored in one of possible containers: Mat, std::vector, std::vector, std::vector. @param distType Distance used by the M-estimator, see #DistanceTypes. @ref DIST_USER -and @ref DIST_C are not suppored. +and @ref DIST_C are not supported. @param param Numerical parameter ( C ) for some types of distances. If it is 0, an optimal value is chosen. @param reps Sufficient accuracy for the radius (distance between the coordinate origin and the diff --git a/modules/gapi/include/opencv2/gapi/infer/ie.hpp b/modules/gapi/include/opencv2/gapi/infer/ie.hpp index e6b7be58ad..204bd8f266 100644 --- a/modules/gapi/include/opencv2/gapi/infer/ie.hpp +++ b/modules/gapi/include/opencv2/gapi/infer/ie.hpp @@ -20,6 +20,7 @@ #include // GAPI_EXPORTS #include // GKernelPackage #include // Generic +#include // Preproc Dev & Ctx namespace cv { namespace gapi { @@ -84,6 +85,9 @@ struct ParamDesc { // have 2D (Layout::NC) input and if the first dimension not equal to 1 // net.setBatchSize(1) will overwrite it. cv::optional batch_size; + + cv::optional vpl_preproc_device; + cv::optional vpl_preproc_ctx; }; } // namespace detail @@ -126,6 +130,8 @@ public: , {} , 1u , {} + , {} + , {} , {}} { }; @@ -148,6 +154,8 @@ public: , {} , 1u , {} + , {} + , {} , {}} { }; @@ -336,6 +344,13 @@ public: return *this; } + Params& cfgPreprocessingParams(const cv::gapi::wip::onevpl::Device &device, + const cv::gapi::wip::onevpl::Context &ctx) { + desc.vpl_preproc_device = cv::util::make_optional(device); + desc.vpl_preproc_ctx = cv::util::make_optional(ctx); + return *this; + } + // BEGIN(G-API's network parametrization API) GBackend backend() const { return cv::gapi::ie::backend(); } std::string tag() const { return Net::tag(); } @@ -370,7 +385,7 @@ public: const std::string &device) : desc{ model, weights, device, {}, {}, {}, 0u, 0u, detail::ParamDesc::Kind::Load, true, {}, {}, {}, 1u, - {}, {}}, + {}, {}, {}, {}}, m_tag(tag) { }; @@ -388,7 +403,7 @@ public: const std::string &device) : desc{ model, {}, device, {}, {}, {}, 0u, 0u, detail::ParamDesc::Kind::Import, true, {}, {}, {}, 1u, - {}, {}}, + {}, {}, {}, {}}, m_tag(tag) { }; diff --git a/modules/gapi/include/opencv2/gapi/infer/onnx.hpp b/modules/gapi/include/opencv2/gapi/infer/onnx.hpp index bb5ef6c59e..16fc42eb63 100644 --- a/modules/gapi/include/opencv2/gapi/infer/onnx.hpp +++ b/modules/gapi/include/opencv2/gapi/infer/onnx.hpp @@ -126,7 +126,7 @@ public: The function is used to associate data of graph outputs with output layers of network topology. If a network has only one output layer, there is no need to call it - as the layer is associated with ouput automatically but this doesn't prevent + as the layer is associated with output automatically but this doesn't prevent you from doing it yourself. Count of names has to match to number of network outputs or you can set your own output but for this case you have to additionally use @ref cfgPostProc function. diff --git a/modules/gapi/include/opencv2/gapi/infer/parsers.hpp b/modules/gapi/include/opencv2/gapi/infer/parsers.hpp index c7308dd39f..e39d6fd4c6 100644 --- a/modules/gapi/include/opencv2/gapi/infer/parsers.hpp +++ b/modules/gapi/include/opencv2/gapi/infer/parsers.hpp @@ -95,7 +95,7 @@ GAPI_EXPORTS_W GArray parseSSD(const GMat& in, /** @brief Parses output of Yolo network. Extracts detection information (box, confidence, label) from Yolo output, -filters it by given confidence and performs non-maximum supression for overlapping boxes. +filters it by given confidence and performs non-maximum suppression for overlapping boxes. @note Function textual ID is "org.opencv.nn.parsers.parseYolo" @@ -105,7 +105,7 @@ where num_classes - a number of classes Yolo network was trained with. @param inSz Size to project detected boxes to (size of the input image). @param confidenceThreshold If confidence of the detection is smaller than confidence threshold, detection is rejected. -@param nmsThreshold Non-maximum supression threshold which controls minimum +@param nmsThreshold Non-maximum suppression threshold which controls minimum relative box intersection area required for rejecting the box with a smaller confidence. If 1.f, nms is not performed and no boxes are rejected. @param anchors Anchors Yolo network was trained with. diff --git a/modules/gapi/include/opencv2/gapi/oak/infer.hpp b/modules/gapi/include/opencv2/gapi/oak/infer.hpp new file mode 100644 index 0000000000..4a1b9f6db6 --- /dev/null +++ b/modules/gapi/include/opencv2/gapi/oak/infer.hpp @@ -0,0 +1,66 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifndef OPENCV_GAPI_OAK_INFER_HPP +#define OPENCV_GAPI_OAK_INFER_HPP + +#include +#include +#include +#include + +#include +#include + +#include // GAPI_EXPORTS +#include // GKernelPackage + +namespace cv { +namespace gapi { +namespace oak { + +namespace detail { +/** +* @brief This structure contains description of inference parameters +* which is specific to OAK models. +*/ +struct ParamDesc { + std::string blob_file; +}; +} // namespace detail + +/** + * Contains description of inference parameters and kit of functions that + * fill this parameters. + */ +template class Params { +public: + /** @brief Class constructor. + + Constructs Params based on model information and sets default values for other + inference description parameters. + + @param model Path to model (.blob file) + */ + explicit Params(const std::string &model) { + desc.blob_file = model; + }; + + // BEGIN(G-API's network parametrization API) + GBackend backend() const { return cv::gapi::oak::backend(); } + std::string tag() const { return Net::tag(); } + cv::util::any params() const { return { desc }; } + // END(G-API's network parametrization API) + +protected: + detail::ParamDesc desc; +}; + +} // namespace oak +} // namespace gapi +} // namespace cv + +#endif // OPENCV_GAPI_OAK_INFER_HPP diff --git a/modules/gapi/include/opencv2/gapi/oak/oak.hpp b/modules/gapi/include/opencv2/gapi/oak/oak.hpp index 05fb09946f..8b56b8a365 100644 --- a/modules/gapi/include/opencv2/gapi/oak/oak.hpp +++ b/modules/gapi/include/opencv2/gapi/oak/oak.hpp @@ -27,7 +27,7 @@ struct EncoderConfig { */ enum class Profile: int { H264_BASELINE, H264_HIGH, H264_MAIN, H265_MAIN, MJPEG }; /** - * Specifies prefered bitrate (kb) of compressed output bitstream + * Specifies preferred bitrate (kb) of compressed output bitstream */ std::int32_t bitrate = 8000; /** @@ -89,28 +89,55 @@ G_API_OP(GSobelXY, , "org.opencv } }; +G_API_OP(GCopy, , "org.opencv.oak.copy") { + static GFrameDesc outMeta(const GFrameDesc& in) { + return in; + } +}; + +// FIXME: add documentation on operations below + GAPI_EXPORTS GArray encode(const GFrame& in, const EncoderConfig&); GAPI_EXPORTS GFrame sobelXY(const GFrame& in, const cv::Mat& hk, const cv::Mat& vk); +GAPI_EXPORTS GFrame copy(const GFrame& in); + // OAK backend & kernels //////////////////////////////////////////////////////// GAPI_EXPORTS cv::gapi::GBackend backend(); GAPI_EXPORTS cv::gapi::GKernelPackage kernels(); // Camera object /////////////////////////////////////////////////////////////// -struct GAPI_EXPORTS ColorCameraParams {}; +struct GAPI_EXPORTS ColorCameraParams { + /** + * Format of the frame one gets from the camera + */ + bool interleaved = false; + + // FIXME: extend + enum class BoardSocket: int { RGB, BGR }; + + BoardSocket board_socket = BoardSocket::RGB; + + // FIXME: extend + enum class Resolution: int { THE_1080_P }; + + Resolution resolution = Resolution::THE_1080_P; +}; class GAPI_EXPORTS ColorCamera: public cv::gapi::wip::IStreamSource { cv::MediaFrame m_dummy; + ColorCameraParams m_params; virtual bool pull(cv::gapi::wip::Data &data) override; virtual GMetaArg descr_of() const override; public: ColorCamera(); + explicit ColorCamera(const ColorCameraParams& params); }; } // namespace oak diff --git a/modules/gapi/include/opencv2/gapi/own/assert.hpp b/modules/gapi/include/opencv2/gapi/own/assert.hpp index d50543fdac..4bd3eaaf50 100644 --- a/modules/gapi/include/opencv2/gapi/own/assert.hpp +++ b/modules/gapi/include/opencv2/gapi/own/assert.hpp @@ -43,7 +43,6 @@ namespace detail #define GAPI_Assert(expr) \ { if (!(expr)) ::detail::assert_abort(#expr, __LINE__, __FILE__, __func__); } - #ifdef NDEBUG # define GAPI_DbgAssert(expr) GAPI_DbgAssertNoOp(expr) #else diff --git a/modules/gapi/include/opencv2/gapi/s11n.hpp b/modules/gapi/include/opencv2/gapi/s11n.hpp index 6863a5ecab..adbcfdbdeb 100644 --- a/modules/gapi/include/opencv2/gapi/s11n.hpp +++ b/modules/gapi/include/opencv2/gapi/s11n.hpp @@ -241,7 +241,7 @@ GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::Scalar &s); GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::Mat &m); GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::Mat &m); -// FIXME: for GRunArgs serailization +// FIXME: for GRunArgs serialization #if !defined(GAPI_STANDALONE) GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::UMat & um); GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::UMat & um); diff --git a/modules/gapi/include/opencv2/gapi/streaming/desync.hpp b/modules/gapi/include/opencv2/gapi/streaming/desync.hpp index 1ed6e24b49..9e927872a3 100644 --- a/modules/gapi/include/opencv2/gapi/streaming/desync.hpp +++ b/modules/gapi/include/opencv2/gapi/streaming/desync.hpp @@ -67,7 +67,7 @@ G desync(const G &g) { * always produce their full output vectors. * * This operation only makes sense when a GComputation is compiled in - * straming mode with cv::GComputation::compileStreaming(). If this + * streaming mode with cv::GComputation::compileStreaming(). If this * operation is used and there are desynchronized outputs, the user * should use a special version of cv::GStreamingCompiled::pull() * which produces an array of cv::util::optional<> objects. diff --git a/modules/gapi/include/opencv2/gapi/streaming/onevpl/accel_types.hpp b/modules/gapi/include/opencv2/gapi/streaming/onevpl/accel_types.hpp new file mode 100644 index 0000000000..421b592aae --- /dev/null +++ b/modules/gapi/include/opencv2/gapi/streaming/onevpl/accel_types.hpp @@ -0,0 +1,72 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifndef GAPI_STREAMING_ONEVPL_ACCEL_TYPES_HPP +#define GAPI_STREAMING_ONEVPL_ACCEL_TYPES_HPP + +#include +#include + +#include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { + +enum class AccelType: uint8_t { + HOST, + DX11, + + LAST_VALUE = std::numeric_limits::max() +}; + +GAPI_EXPORTS const char* to_cstring(AccelType type); + +struct IDeviceSelector; +struct GAPI_EXPORTS Device { + friend struct IDeviceSelector; + using Ptr = void*; + + ~Device(); + const std::string& get_name() const; + Ptr get_ptr() const; + AccelType get_type() const; +private: + Device(Ptr device_ptr, const std::string& device_name, + AccelType device_type); + + std::string name; + Ptr ptr; + AccelType type; +}; + +struct GAPI_EXPORTS Context { + friend struct IDeviceSelector; + using Ptr = void*; + + ~Context(); + Ptr get_ptr() const; + AccelType get_type() const; +private: + Context(Ptr ctx_ptr, AccelType ctx_type); + Ptr ptr; + AccelType type; +}; + +GAPI_EXPORTS Device create_host_device(); +GAPI_EXPORTS Context create_host_context(); + +GAPI_EXPORTS Device create_dx11_device(Device::Ptr device_ptr, + const std::string& device_name); +GAPI_EXPORTS Context create_dx11_context(Context::Ptr ctx_ptr); + +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv + +#endif // GAPI_STREAMING_ONEVPL_ACCEL_TYPES_HPP diff --git a/modules/gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp b/modules/gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp index d93b4c561d..25573afd4c 100644 --- a/modules/gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp +++ b/modules/gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp @@ -22,14 +22,14 @@ namespace onevpl { /** * @brief Public class is using for creation of onevpl::GSource instances. * - * Class members availaible through methods @ref CfgParam::get_name() and @ref CfgParam::get_value() are used by + * Class members available through methods @ref CfgParam::get_name() and @ref CfgParam::get_value() are used by * onevpl::GSource inner logic to create or find oneVPL particular implementation * (software/hardware, specific API version and etc.). * * @note Because oneVPL may provide several implementations which are satisfying with multiple (or single one) @ref CfgParam * criteria therefore it is possible to configure `preferred` parameters. This kind of CfgParams are created * using `is_major = false` argument in @ref CfgParam::create method and are not used by creating oneVPL particular implementations. - * Instead they fill out a "score table" to select preferrable implementation from available list. Implementation are satisfying + * Instead they fill out a "score table" to select preferable implementation from available list. Implementation are satisfying * with most of these optional params would be chosen. * If no one optional CfgParam params were present then first of available oneVPL implementation would be applied. * Please get on https://spec.oneapi.io/versions/latest/elements/oneVPL/source/API_ref/VPL_disp_api_func.html?highlight=mfxcreateconfig#mfxsetconfigfilterproperty diff --git a/modules/gapi/include/opencv2/gapi/streaming/onevpl/data_provider_interface.hpp b/modules/gapi/include/opencv2/gapi/streaming/onevpl/data_provider_interface.hpp index c70e3db0ac..ec683a7527 100644 --- a/modules/gapi/include/opencv2/gapi/streaming/onevpl/data_provider_interface.hpp +++ b/modules/gapi/include/opencv2/gapi/streaming/onevpl/data_provider_interface.hpp @@ -27,7 +27,7 @@ private: }; struct GAPI_EXPORTS DataProviderSystemErrorException final : public DataProviderException { - DataProviderSystemErrorException(int error_code, const std::string& desription = std::string()); + DataProviderSystemErrorException(int error_code, const std::string& description = std::string()); ~DataProviderSystemErrorException() = default; }; diff --git a/modules/gapi/include/opencv2/gapi/streaming/onevpl/device_selector_interface.hpp b/modules/gapi/include/opencv2/gapi/streaming/onevpl/device_selector_interface.hpp index 04f8cae02a..2e2d879fba 100644 --- a/modules/gapi/include/opencv2/gapi/streaming/onevpl/device_selector_interface.hpp +++ b/modules/gapi/include/opencv2/gapi/streaming/onevpl/device_selector_interface.hpp @@ -12,53 +12,12 @@ #include #include -#include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS +#include namespace cv { namespace gapi { namespace wip { namespace onevpl { - -enum class AccelType: uint8_t { - HOST, - DX11, - - LAST_VALUE = std::numeric_limits::max() -}; - -GAPI_EXPORTS const char* to_cstring(AccelType type); - -struct IDeviceSelector; -struct GAPI_EXPORTS Device { - friend struct IDeviceSelector; - using Ptr = void*; - - ~Device(); - const std::string& get_name() const; - Ptr get_ptr() const; - AccelType get_type() const; -private: - Device(Ptr device_ptr, const std::string& device_name, - AccelType device_type); - - std::string name; - Ptr ptr; - AccelType type; -}; - -struct GAPI_EXPORTS Context { - friend struct IDeviceSelector; - using Ptr = void*; - - ~Context(); - Ptr get_ptr() const; - AccelType get_type() const; -private: - Context(Ptr ctx_ptr, AccelType ctx_type); - Ptr ptr; - AccelType type; -}; - struct GAPI_EXPORTS IDeviceSelector { using Ptr = std::shared_ptr; diff --git a/modules/gapi/include/opencv2/gapi/streaming/onevpl/source.hpp b/modules/gapi/include/opencv2/gapi/streaming/onevpl/source.hpp index 6334480c1b..04dc2e246d 100644 --- a/modules/gapi/include/opencv2/gapi/streaming/onevpl/source.hpp +++ b/modules/gapi/include/opencv2/gapi/streaming/onevpl/source.hpp @@ -46,6 +46,10 @@ public: void* accel_device_ptr, void* accel_ctx_ptr); + GSource(const std::string& filePath, + const CfgParams& cfg_params, + const Device &device, const Context &ctx); + GSource(const std::string& filePath, const CfgParams& cfg_params, std::shared_ptr selector); diff --git a/modules/gapi/misc/python/package/gapi/__init__.py b/modules/gapi/misc/python/package/gapi/__init__.py index 6323582f5b..87ad9e2086 100644 --- a/modules/gapi/misc/python/package/gapi/__init__.py +++ b/modules/gapi/misc/python/package/gapi/__init__.py @@ -218,7 +218,7 @@ def op(op_id, in_types, out_types): for i, t in enumerate(out_types): if t not in [cv.GMat, cv.GScalar, *garray_types, *gopaque_types]: - raise Exception('{} unsupported output type: {} in possition: {}' + raise Exception('{} unsupported output type: {} in position: {}' .format(cls.__name__, t.__name__, i)) def on(*args): @@ -287,15 +287,4 @@ def kernel(op_cls): return kernel_with_params -# FIXME: On the c++ side every class is placed in cv2 module. -cv.gapi.wip.draw.Rect = cv.gapi_wip_draw_Rect -cv.gapi.wip.draw.Text = cv.gapi_wip_draw_Text -cv.gapi.wip.draw.Circle = cv.gapi_wip_draw_Circle -cv.gapi.wip.draw.Line = cv.gapi_wip_draw_Line -cv.gapi.wip.draw.Mosaic = cv.gapi_wip_draw_Mosaic -cv.gapi.wip.draw.Image = cv.gapi_wip_draw_Image -cv.gapi.wip.draw.Poly = cv.gapi_wip_draw_Poly - -cv.gapi.streaming.queue_capacity = cv.gapi_streaming_queue_capacity - cv.gapi.wip.GStreamerPipeline = cv.gapi_wip_gst_GStreamerPipeline diff --git a/modules/gapi/misc/python/pyopencv_gapi.hpp b/modules/gapi/misc/python/pyopencv_gapi.hpp index b4be0048d0..7b760920e7 100644 --- a/modules/gapi/misc/python/pyopencv_gapi.hpp +++ b/modules/gapi/misc/python/pyopencv_gapi.hpp @@ -701,7 +701,7 @@ static cv::GRunArgs run_py_kernel(cv::detail::PyObjectHolder kernel, PyErr_Clear(); throw std::logic_error("Python kernel failed with error!"); } - // NB: In fact it's impossible situation, becase errors were handled above. + // NB: In fact it's impossible situation, because errors were handled above. GAPI_Assert(result.get() && "Python kernel returned NULL!"); if (out_info.size() == 1) @@ -811,7 +811,7 @@ static GMetaArgs run_py_meta(cv::detail::PyObjectHolder out_meta, PyErr_Clear(); throw std::logic_error("Python outMeta failed with error!"); } - // NB: In fact it's impossible situation, becase errors were handled above. + // NB: In fact it's impossible situation, because errors were handled above. GAPI_Assert(result.get() && "Python outMeta returned NULL!"); out_metas = PyTuple_Check(result.get()) ? get_meta_args(result.get()) diff --git a/modules/gapi/misc/python/test/test_gapi_sample_pipelines.py b/modules/gapi/misc/python/test/test_gapi_sample_pipelines.py index 34a07b535b..7763579ebf 100644 --- a/modules/gapi/misc/python/test/test_gapi_sample_pipelines.py +++ b/modules/gapi/misc/python/test/test_gapi_sample_pipelines.py @@ -187,7 +187,7 @@ try: blockSize=block_sz, useHarrisDetector=use_harris_detector, k=k) # NB: The operation output is cv::GArray, so it should be mapped - # to python paramaters like this: [(1.2, 3.4), (5.2, 3.2)], because the cv::Point2f + # to python parameters like this: [(1.2, 3.4), (5.2, 3.2)], because the cv::Point2f # according to opencv rules mapped to the tuple and cv::GArray<> mapped to the list. # OpenCV returns np.array with shape (n_features, 1, 2), so let's to convert it to list # tuples with size == n_features. @@ -203,7 +203,7 @@ try: @staticmethod def outMeta(desc): - raise NotImplementedError("outMeta isn't imlemented") + raise NotImplementedError("outMeta isn't implemented") return Op @@ -605,7 +605,7 @@ try: img1 = np.array([1, 2, 3]) # FIXME: Cause Bad variant access. - # Need to provide more descriptive error messsage. + # Need to provide more descriptive error message. with self.assertRaises(Exception): comp.apply(cv.gin(img0, img1), args=cv.gapi.compile_args( cv.gapi.kernels(GAddImpl))) diff --git a/modules/gapi/misc/python/test/test_gapi_streaming.py b/modules/gapi/misc/python/test/test_gapi_streaming.py index d06447d791..e4c5926204 100644 --- a/modules/gapi/misc/python/test/test_gapi_streaming.py +++ b/modules/gapi/misc/python/test/test_gapi_streaming.py @@ -454,7 +454,7 @@ try: else: raise unittest.SkipTest(str(e)) except SystemError as e: - raise unittest.SkipTest(str(e) + ", casued by " + str(e.__cause__)) + raise unittest.SkipTest(str(e) + ", caused by " + str(e.__cause__)) def test_gst_multiple_sources(self): diff --git a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp index 72837da199..83ef13008c 100644 --- a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp +++ b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp @@ -436,7 +436,7 @@ PERF_TEST_P_(DivPerfTest, TestPerformance) // FIXIT Unstable input data for divide initMatsRandU(type, sz, dtype, false); - //This condition need to workaround issue in the OpenCV. + //This condition need to workaround the #21044 issue in the OpenCV. //It reinitializes divider matrix without zero values for CV_16S DST type. if (dtype == CV_16S && dtype != type) cv::randu(in_mat2, cv::Scalar::all(1), cv::Scalar::all(255)); @@ -482,7 +482,7 @@ PERF_TEST_P_(DivCPerfTest, TestPerformance) // FIXIT Unstable input data for divide initMatsRandU(type, sz, dtype, false); - //This condition need as workaround the issue in the OpenCV. + //This condition need to workaround the #21044 issue in the OpenCV. //It reinitializes divider scalar without zero values for CV_16S DST type. if (dtype == CV_16S || (type == CV_16S && dtype == -1)) cv::randu(sc, cv::Scalar::all(1), cv::Scalar::all(SHRT_MAX)); @@ -528,6 +528,10 @@ PERF_TEST_P_(DivRCPerfTest, TestPerformance) // FIXIT Unstable input data for divide initMatsRandU(type, sz, dtype, false); + //This condition need to workaround the #21044 issue in the OpenCV. + //It reinitializes divider matrix without zero values for CV_16S DST type. + if (dtype == CV_16S || (type == CV_16S && dtype == -1)) + cv::randu(in_mat1, cv::Scalar::all(1), cv::Scalar::all(255)); // OpenCV code /////////////////////////////////////////////////////////// cv::divide(sc, in_mat1, out_mat_ocv, scale, dtype); diff --git a/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp b/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp index 796d05101e..83de793a81 100644 --- a/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp +++ b/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp @@ -28,8 +28,8 @@ INSTANTIATE_TEST_CASE_P(SqrtPerfTestFluid, SqrtPerfTest, INSTANTIATE_TEST_CASE_P(AddPerfTestFluid, AddPerfTest, Combine(Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_f()), Values(szSmall128, szVGA, sz720p, sz1080p), - Values(CV_8UC1, CV_8UC3, CV_16SC1, CV_32FC1), - Values(-1, CV_8U, CV_32F), + Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1), + Values(-1, CV_8U, CV_16U, CV_16S, CV_32F), Values(cv::compile_args(CORE_FLUID)))); INSTANTIATE_TEST_CASE_P(AddCPerfTestFluid, AddCPerfTest, @@ -40,10 +40,10 @@ INSTANTIATE_TEST_CASE_P(AddCPerfTestFluid, AddCPerfTest, Values(cv::compile_args(CORE_FLUID)))); INSTANTIATE_TEST_CASE_P(SubPerfTestFluid, SubPerfTest, - Combine(Values(AbsExact().to_compare_f()), + Combine(Values(Tolerance_FloatRel_IntAbs(1e-6, 0).to_compare_f()), Values(szSmall128, szVGA, sz720p, sz1080p), - Values(CV_8UC1, CV_8UC3, CV_16SC1, CV_32FC1), - Values(-1, CV_8U, CV_32F), + Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1), + Values(-1, CV_8U, CV_16U, CV_16S, CV_32F), Values(cv::compile_args(CORE_FLUID)))); INSTANTIATE_TEST_CASE_P(SubCPerfTestFluid, SubCPerfTest, @@ -101,8 +101,8 @@ INSTANTIATE_TEST_CASE_P(DivCPerfTestFluid, DivCPerfTest, INSTANTIATE_TEST_CASE_P(DivRCPerfTestFluid, DivRCPerfTest, Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()), Values(szSmall128, szVGA, sz720p, sz1080p), - Values(CV_8UC1, CV_8UC3, CV_32FC1), - Values(-1, CV_8U, CV_32F), + Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1), + Values(-1, CV_8U, CV_16U, CV_16S, CV_32F), Values(1.0), Values(cv::compile_args(CORE_FLUID)))); @@ -324,7 +324,7 @@ INSTANTIATE_TEST_CASE_P(ConvertToPerfTestFluid, ConvertToPerfTest, Values(CV_8UC3, CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1), Values(CV_8U, CV_16U, CV_16S, CV_32F), Values(szSmall128, szVGA, sz720p, sz1080p), - Values(2.5, 1.0), + Values(1.0, 2.5), Values(0.0), Values(cv::compile_args(CORE_FLUID)))); diff --git a/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp b/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp index b5e72ae4ce..a768875f32 100644 --- a/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp +++ b/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp @@ -201,7 +201,7 @@ INSTANTIATE_TEST_CASE_P(RGB2LabPerfTestFluid, RGB2LabPerfTest, INSTANTIATE_TEST_CASE_P(ResizePerfTestFluid, ResizePerfTest, Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()), - Values(CV_8UC3), + Values(CV_8UC3, CV_32FC1), Values(cv::INTER_LINEAR), Values(szSmall128, szVGA, sz720p, sz1080p), Values(cv::Size(64, 64), @@ -217,7 +217,7 @@ INSTANTIATE_TEST_CASE_P(BottleneckKernelsPerfTestFluid, BottleneckKernelsConstIn INSTANTIATE_TEST_CASE_P(ResizeInSimpleGraphPerfTestFluid, ResizeInSimpleGraphPerfTest, Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()), - Values(CV_8UC3), + Values(CV_8UC3, CV_32FC1), Values(szSmall128, szVGA, sz720p, sz1080p), Values(0.5), Values(0.5), @@ -225,7 +225,7 @@ INSTANTIATE_TEST_CASE_P(ResizeInSimpleGraphPerfTestFluid, ResizeInSimpleGraphPer INSTANTIATE_TEST_CASE_P(ResizeFxFyPerfTestFluid, ResizeFxFyPerfTest, Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()), - Values(CV_8UC3), + Values(CV_8UC3, CV_32FC1), Values(cv::INTER_LINEAR), Values(szSmall128, szVGA, sz720p, sz1080p), Values(0.5, 0.25, 2), diff --git a/modules/gapi/perf/streaming/gapi_streaming_source_perf_tests.cpp b/modules/gapi/perf/streaming/gapi_streaming_source_perf_tests.cpp index 7d06ad068b..eacdef6eba 100644 --- a/modules/gapi/perf/streaming/gapi_streaming_source_perf_tests.cpp +++ b/modules/gapi/perf/streaming/gapi_streaming_source_perf_tests.cpp @@ -11,6 +11,13 @@ #include #include +#include "streaming/onevpl/engine/preproc/preproc_engine.hpp" +#include "streaming/onevpl/engine/preproc/preproc_session.hpp" +#include "streaming/onevpl/accelerators/accel_policy_interface.hpp" +#include "streaming/onevpl/cfg_param_device_selector.hpp" +#include "streaming/onevpl/accelerators/accel_policy_dx11.hpp" +#include "streaming/onevpl/accelerators/accel_policy_cpu.hpp" + namespace opencv_test { using namespace perf; @@ -32,10 +39,10 @@ using codec_t = std::string; using accel_mode_t = std::string; using source_description_t = std::tuple; -class OneVPLSourcePerfTest : public TestPerfParams {}; -class VideoCapSourcePerfTest : public TestPerfParams {}; +class OneVPLSourcePerf_Test : public TestPerfParams {}; +class VideoCapSourcePerf_Test : public TestPerfParams {}; -PERF_TEST_P_(OneVPLSourcePerfTest, TestPerformance) +PERF_TEST_P_(OneVPLSourcePerf_Test, TestPerformance) { using namespace cv::gapi::wip::onevpl; @@ -67,7 +74,7 @@ PERF_TEST_P_(OneVPLSourcePerfTest, TestPerformance) SANITY_CHECK_NOTHING(); } -PERF_TEST_P_(VideoCapSourcePerfTest, TestPerformance) +PERF_TEST_P_(VideoCapSourcePerf_Test, TestPerformance) { using namespace cv::gapi::wip; @@ -82,7 +89,7 @@ PERF_TEST_P_(VideoCapSourcePerfTest, TestPerformance) SANITY_CHECK_NOTHING(); } -INSTANTIATE_TEST_CASE_P(Streaming, OneVPLSourcePerfTest, +INSTANTIATE_TEST_CASE_P(Streaming, OneVPLSourcePerf_Test, Values(source_description_t(files[0], codec[0], ""), source_description_t(files[0], codec[0], "MFX_ACCEL_MODE_VIA_D3D11"), source_description_t(files[1], codec[1], ""), @@ -90,10 +97,204 @@ INSTANTIATE_TEST_CASE_P(Streaming, OneVPLSourcePerfTest, source_description_t(files[2], codec[2], ""), source_description_t(files[2], codec[2], "MFX_ACCEL_MODE_VIA_D3D11"))); -INSTANTIATE_TEST_CASE_P(Streaming, VideoCapSourcePerfTest, +INSTANTIATE_TEST_CASE_P(Streaming, VideoCapSourcePerf_Test, Values(files[0], files[1], files[2])); + +using pp_out_param_t = cv::GFrameDesc; +using source_description_preproc_t = decltype(std::tuple_cat(std::declval(), + std::declval>())); +class OneVPLSourcePerf_PP_Test : public TestPerfParams {}; + +PERF_TEST_P_(OneVPLSourcePerf_PP_Test, TestPerformance) +{ + using namespace cv::gapi::wip::onevpl; + + const auto params = GetParam(); + source_t src = findDataFile(get<0>(params)); + codec_t type = get<1>(params); + accel_mode_t mode = get<2>(params); + pp_out_param_t res = get<3>(params); + + std::vector cfg_params { + CfgParam::create_implementation("MFX_IMPL_TYPE_HARDWARE"), + }; + + if (!type.empty()) { + cfg_params.push_back(CfgParam::create_decoder_id(type.c_str())); + } + + if (!mode.empty()) { + cfg_params.push_back(CfgParam::create_acceleration_mode(mode.c_str())); + } + + cfg_params.push_back(CfgParam::create_vpp_out_width(static_cast(res.size.width))); + cfg_params.push_back(CfgParam::create_vpp_out_height(static_cast(res.size.height))); + cfg_params.push_back(CfgParam::create_vpp_out_crop_x(0)); + cfg_params.push_back(CfgParam::create_vpp_out_crop_y(0)); + cfg_params.push_back(CfgParam::create_vpp_out_crop_w(static_cast(res.size.width))); + cfg_params.push_back(CfgParam::create_vpp_out_crop_h(static_cast(res.size.height))); + + auto source_ptr = cv::gapi::wip::make_onevpl_src(src, cfg_params); + + cv::gapi::wip::Data out; + TEST_CYCLE() + { + source_ptr->pull(out); + } + + SANITY_CHECK_NOTHING(); +} +static pp_out_param_t full_hd = pp_out_param_t {cv::MediaFormat::NV12, + {1920, 1080}}; + +static pp_out_param_t cif = pp_out_param_t {cv::MediaFormat::NV12, + {352, 288}}; + +INSTANTIATE_TEST_CASE_P(Streaming_Source_PP, OneVPLSourcePerf_PP_Test, + Values(source_description_preproc_t(files[0], codec[0], "", full_hd), + source_description_preproc_t(files[0], codec[0], "", cif), + source_description_preproc_t(files[0], codec[0], "MFX_ACCEL_MODE_VIA_D3D11", full_hd), + source_description_preproc_t(files[0], codec[0], "MFX_ACCEL_MODE_VIA_D3D11", cif), + source_description_preproc_t(files[1], codec[1], "", full_hd), + source_description_preproc_t(files[1], codec[1], "", cif), + source_description_preproc_t(files[1], codec[1], "MFX_ACCEL_MODE_VIA_D3D11",full_hd), + source_description_preproc_t(files[1], codec[1], "MFX_ACCEL_MODE_VIA_D3D11",cif), + source_description_preproc_t(files[2], codec[2], "", full_hd), + source_description_preproc_t(files[2], codec[2], "", cif), + source_description_preproc_t(files[2], codec[2], "MFX_ACCEL_MODE_VIA_D3D11", full_hd), + source_description_preproc_t(files[2], codec[2], "MFX_ACCEL_MODE_VIA_D3D11", cif))); + +class OneVPLSourcePerf_PP_Engine_Test : public TestPerfParams {}; + +PERF_TEST_P_(OneVPLSourcePerf_PP_Engine_Test, TestPerformance) +{ + using namespace cv::gapi::wip; + using namespace cv::gapi::wip::onevpl; + + const auto params = GetParam(); + source_t src = findDataFile(get<0>(params)); + codec_t type = get<1>(params); + accel_mode_t mode = get<2>(params); + const pp_out_param_t &required_frame_param = get<3>(params); + + std::vector cfg_params { + CfgParam::create_implementation("MFX_IMPL_TYPE_HARDWARE"), + }; + + if (!type.empty()) { + cfg_params.push_back(CfgParam::create_decoder_id(type.c_str())); + } + + if (!mode.empty()) { + cfg_params.push_back(CfgParam::create_acceleration_mode(mode.c_str())); + } + + auto device_selector = std::make_shared(cfg_params); + auto source_ptr = cv::gapi::wip::make_onevpl_src(src, cfg_params, device_selector); + + // create VPP preproc engine + std::unique_ptr policy; + if (mode == "MFX_ACCEL_MODE_VIA_D3D11") { + policy.reset(new VPLDX11AccelerationPolicy(device_selector)); + } else if (mode.empty()){ + policy.reset(new VPLCPUAccelerationPolicy(device_selector)); + } else { + ASSERT_TRUE(false && "Unsupported acceleration policy type"); + } + VPPPreprocEngine preproc_engine(std::move(policy)); + cv::gapi::wip::Data out; + cv::util::optional empty_roi; + TEST_CYCLE() + { + source_ptr->pull(out); + cv::MediaFrame frame = cv::util::get(out); + cv::util::optional param = preproc_engine.is_applicable(frame); + pp_session sess = preproc_engine.initialize_preproc(param.value(), + required_frame_param); + (void)preproc_engine.run_sync(sess, frame, empty_roi); + } + + SANITY_CHECK_NOTHING(); +} + +INSTANTIATE_TEST_CASE_P(Streaming_Engine_PP, OneVPLSourcePerf_PP_Engine_Test, + Values(source_description_preproc_t(files[0], codec[0], "", full_hd), + source_description_preproc_t(files[0], codec[0], "", cif), + source_description_preproc_t(files[0], codec[0], "MFX_ACCEL_MODE_VIA_D3D11", full_hd), + source_description_preproc_t(files[0], codec[0], "MFX_ACCEL_MODE_VIA_D3D11", cif), + source_description_preproc_t(files[1], codec[1], "", full_hd), + source_description_preproc_t(files[1], codec[1], "", cif), + source_description_preproc_t(files[1], codec[1], "MFX_ACCEL_MODE_VIA_D3D11",full_hd), + source_description_preproc_t(files[1], codec[1], "MFX_ACCEL_MODE_VIA_D3D11",cif), + source_description_preproc_t(files[2], codec[2], "", full_hd), + source_description_preproc_t(files[2], codec[2], "", cif), + source_description_preproc_t(files[2], codec[2], "MFX_ACCEL_MODE_VIA_D3D11", full_hd), + source_description_preproc_t(files[2], codec[2], "MFX_ACCEL_MODE_VIA_D3D11", cif))); + +class OneVPLSourcePerf_PP_Engine_Bypass_Test : public TestPerfParams {}; + +PERF_TEST_P_(OneVPLSourcePerf_PP_Engine_Bypass_Test, TestPerformance) +{ + using namespace cv::gapi::wip; + using namespace cv::gapi::wip::onevpl; + + const auto params = GetParam(); + source_t src = findDataFile(get<0>(params)); + codec_t type = get<1>(params); + accel_mode_t mode = get<2>(params); + const pp_out_param_t &required_frame_param = get<3>(params); + + std::vector cfg_params { + CfgParam::create_implementation("MFX_IMPL_TYPE_HARDWARE"), + }; + + if (!type.empty()) { + cfg_params.push_back(CfgParam::create_decoder_id(type.c_str())); + } + + if (!mode.empty()) { + cfg_params.push_back(CfgParam::create_acceleration_mode(mode.c_str())); + } + + auto device_selector = std::make_shared(cfg_params); + auto source_ptr = cv::gapi::wip::make_onevpl_src(src, cfg_params, device_selector); + + // create VPP preproc engine + std::unique_ptr policy; + if (mode == "MFX_ACCEL_MODE_VIA_D3D11") { + policy.reset(new VPLDX11AccelerationPolicy(device_selector)); + } else { + policy.reset(new VPLCPUAccelerationPolicy(device_selector)); + } + VPPPreprocEngine preproc_engine(std::move(policy)); + cv::gapi::wip::Data out; + cv::util::optional empty_roi; + TEST_CYCLE() + { + source_ptr->pull(out); + cv::MediaFrame frame = cv::util::get(out); + cv::util::optional param = preproc_engine.is_applicable(frame); + pp_session sess = preproc_engine.initialize_preproc(param.value(), + required_frame_param); + (void)preproc_engine.run_sync(sess, frame, empty_roi); + } + + SANITY_CHECK_NOTHING(); +} + +static pp_out_param_t res_672x384 = pp_out_param_t {cv::MediaFormat::NV12, + {672, 384}}; +static pp_out_param_t res_336x256 = pp_out_param_t {cv::MediaFormat::NV12, + {336, 256}}; +INSTANTIATE_TEST_CASE_P(Streaming_Engine_PP_Bypass, OneVPLSourcePerf_PP_Engine_Bypass_Test, + Values(source_description_preproc_t(files[0], codec[0], "", res_672x384), + source_description_preproc_t(files[0], codec[0], "MFX_ACCEL_MODE_VIA_D3D11", res_672x384), + source_description_preproc_t(files[1], codec[1], "", res_672x384), + source_description_preproc_t(files[1], codec[1], "MFX_ACCEL_MODE_VIA_D3D11", res_672x384), + source_description_preproc_t(files[2], codec[2], "", res_336x256), + source_description_preproc_t(files[2], codec[2], "MFX_ACCEL_MODE_VIA_D3D11", res_336x256))); } // namespace opencv_test #endif // HAVE_ONEVPL diff --git a/modules/gapi/samples/face_detection_mtcnn.cpp b/modules/gapi/samples/face_detection_mtcnn.cpp index 50cb666a90..583219b51d 100644 --- a/modules/gapi/samples/face_detection_mtcnn.cpp +++ b/modules/gapi/samples/face_detection_mtcnn.cpp @@ -33,7 +33,7 @@ const std::string keys = "{ thrr | 0.7 | MTCNN R confidence threshold}" "{ thro | 0.7 | MTCNN O confidence threshold}" "{ half_scale | false | MTCNN P use half scale pyramid}" -"{ queue_capacity | 1 | Streaming executor queue capacity. Calculated automaticaly if 0}" +"{ queue_capacity | 1 | Streaming executor queue capacity. Calculated automatically if 0}" ; namespace { @@ -488,8 +488,8 @@ static inline std::string get_pnet_level_name(const cv::Size &in_size) { } int calculate_scales(const cv::Size &input_size, std::vector &out_scales, std::vector &out_sizes ) { - //calculate multi - scale and limit the maxinum side to 1000 - //pr_scale: limit the maxinum side to 1000, < 1.0 + //calculate multi - scale and limit the maximum side to 1000 + //pr_scale: limit the maximum side to 1000, < 1.0 double pr_scale = 1.0; double h = static_cast(input_size.height); double w = static_cast(input_size.width); @@ -602,7 +602,7 @@ int main(int argc, char* argv[]) { cv::GArray final_p_faces_for_bb2squares = custom::ApplyRegression::on(faces0, true); cv::GArray final_faces_pnet0 = custom::BBoxesToSquares::on(final_p_faces_for_bb2squares); total_faces[0] = custom::RunNMS::on(final_faces_pnet0, 0.5f, false); - //The rest PNet pyramid layers to accumlate all layers result in total_faces[PYRAMID_LEVELS - 1]] + //The rest PNet pyramid layers to accumulate all layers result in total_faces[PYRAMID_LEVELS - 1]] for (int i = 1; i < pyramid_levels; ++i) { std::tie(regressions[i], scores[i]) = run_mtcnn_p(in_transposedRGB, get_pnet_level_name(level_size[i])); diff --git a/modules/gapi/samples/infer_single_roi.cpp b/modules/gapi/samples/infer_single_roi.cpp index e9c26a9b63..15af442a9d 100644 --- a/modules/gapi/samples/infer_single_roi.cpp +++ b/modules/gapi/samples/infer_single_roi.cpp @@ -150,7 +150,7 @@ int main(int argc, char *argv[]) auto networks = cv::gapi::networks(face_net); // Now build the graph. The graph structure may vary - // pased on the input parameters + // passed on the input parameters cv::GStreamingCompiled pipeline; auto inputs = cv::gin(cv::gapi::wip::make_src(input)); diff --git a/modules/gapi/samples/oak_basic_infer.cpp b/modules/gapi/samples/oak_basic_infer.cpp new file mode 100644 index 0000000000..ee234e3806 --- /dev/null +++ b/modules/gapi/samples/oak_basic_infer.cpp @@ -0,0 +1,122 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +const std::string keys = + "{ h help | | Print this help message }" + "{ detector | | Path to compiled .blob face detector model }" + "{ duration | 100 | Number of frames to pull from camera and run inference on }"; + +namespace custom { + +G_API_NET(FaceDetector, , "sample.custom.face-detector"); + +using GDetections = cv::GArray; +using GSize = cv::GOpaque; +using GPrims = cv::GArray; + +G_API_OP(BBoxes, , "sample.custom.b-boxes") { + static cv::GArrayDesc outMeta(const cv::GArrayDesc &) { + return cv::empty_array_desc(); + } +}; + +GAPI_OCV_KERNEL(OCVBBoxes, BBoxes) { + // This kernel converts the rectangles into G-API's + // rendering primitives + static void run(const std::vector &in_face_rcs, + std::vector &out_prims) { + out_prims.clear(); + const auto cvt = [](const cv::Rect &rc, const cv::Scalar &clr) { + return cv::gapi::wip::draw::Rect(rc, clr, 2); + }; + for (auto &&rc : in_face_rcs) { + out_prims.emplace_back(cvt(rc, CV_RGB(0,255,0))); // green + } + } +}; + +} // namespace custom + +int main(int argc, char *argv[]) { + cv::CommandLineParser cmd(argc, argv, keys); + if (cmd.has("help")) { + cmd.printMessage(); + return 0; + } + + const auto det_name = cmd.get("detector"); + const auto duration = cmd.get("duration"); + + if (det_name.empty()) { + std::cerr << "FATAL: path to detection model is not provided for the sample." + << "Please specify it with --detector options." + << std::endl; + return 1; + } + + // Prepare G-API kernels and networks packages: + auto detector = cv::gapi::oak::Params(det_name); + auto networks = cv::gapi::networks(detector); + + auto kernels = cv::gapi::combine( + cv::gapi::kernels(), + cv::gapi::oak::kernels()); + + auto args = cv::compile_args(kernels, networks); + + // Initialize graph structure + cv::GFrame in; + cv::GFrame copy = cv::gapi::oak::copy(in); // NV12 transfered to host + passthrough copy for infer + cv::GOpaque sz = cv::gapi::streaming::size(copy); + + // infer is not affected by the actual copy here + cv::GMat blob = cv::gapi::infer(copy); + // FIXME: OAK infer detects faces slightly out of frame bounds + cv::GArray rcs = cv::gapi::parseSSD(blob, sz, 0.5f, true, false); + auto rendered = cv::gapi::wip::draw::renderFrame(copy, custom::BBoxes::on(rcs)); + // on-the-fly conversion NV12->BGR + cv::GMat out = cv::gapi::streaming::BGR(rendered); + + auto pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out, rcs)) + .compileStreaming(std::move(args)); + + // Graph execution + pipeline.setSource(cv::gapi::wip::make_src()); + pipeline.start(); + + cv::Mat out_mat; + std::vector out_dets; + int frames = 0; + while (pipeline.pull(cv::gout(out_mat, out_dets))) { + std::string name = "oak_infer_frame_" + std::to_string(frames) + ".png"; + + cv::imwrite(name, out_mat); + + if (!out_dets.empty()) { + std::cout << "Got " << out_dets.size() << " detections on frame #" << frames << std::endl; + } + + ++frames; + if (frames == duration) { + pipeline.stop(); + break; + } + } + std::cout << "Pipeline finished. Processed " << frames << " frames" << std::endl; + return 0; +} diff --git a/modules/gapi/samples/oak_copy.cpp b/modules/gapi/samples/oak_copy.cpp new file mode 100644 index 0000000000..3c4d455f52 --- /dev/null +++ b/modules/gapi/samples/oak_copy.cpp @@ -0,0 +1,48 @@ +#include +#include +#include +#include +#include + +#include +#include // BGR accessor + +#include // CommandLineParser + +const std::string keys = + "{ h help | | Print this help message }" + "{ output | output.png | Path to the output file }"; + +int main(int argc, char *argv[]) { + cv::CommandLineParser cmd(argc, argv, keys); + if (cmd.has("help")) { + cmd.printMessage(); + return 0; + } + + const std::string output_name = cmd.get("output"); + + cv::GFrame in; + // Actually transfers data to host + cv::GFrame copy = cv::gapi::oak::copy(in); + // Default camera works only with nv12 format + cv::GMat out = cv::gapi::streaming::Y(copy); + + auto args = cv::compile_args(cv::gapi::oak::ColorCameraParams{}, + cv::gapi::oak::kernels()); + + auto pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out)).compileStreaming(std::move(args)); + + // Graph execution ///////////////////////////////////////////////////////// + cv::Mat out_mat(1920, 1080, CV_8UC1); + + pipeline.setSource(cv::gapi::wip::make_src()); + pipeline.start(); + + // pull 1 frame + pipeline.pull(cv::gout(out_mat)); + + cv::imwrite(output_name, out_mat); + + std::cout << "Pipeline finished: " << output_name << " file has been written." << std::endl; +} diff --git a/modules/gapi/samples/oak_rgb_camera_encoding.cpp b/modules/gapi/samples/oak_rgb_camera_encoding.cpp index ac6b5cc5f0..ee07ef09e4 100644 --- a/modules/gapi/samples/oak_rgb_camera_encoding.cpp +++ b/modules/gapi/samples/oak_rgb_camera_encoding.cpp @@ -13,8 +13,6 @@ const std::string keys = "{ h help | | Print this help message }" "{ output | output.h265 | Path to the output .h265 video file }"; -#ifdef HAVE_OAK - int main(int argc, char *argv[]) { cv::CommandLineParser cmd(argc, argv, keys); if (cmd.has("help")) { @@ -60,11 +58,3 @@ int main(int argc, char *argv[]) { std::cout << "Pipeline finished: " << output_name << " file has been written." << std::endl; } -#else // HAVE_OAK - -int main() { - GAPI_Assert(false && "Built without OAK support"); - return -1; -} - -#endif // HAVE_OAK diff --git a/modules/gapi/samples/oak_small_hetero_pipeline.cpp b/modules/gapi/samples/oak_small_hetero_pipeline.cpp index dadb9d0f3b..5df3ba5da1 100644 --- a/modules/gapi/samples/oak_small_hetero_pipeline.cpp +++ b/modules/gapi/samples/oak_small_hetero_pipeline.cpp @@ -13,8 +13,6 @@ const std::string keys = "{ h help | | Print this help message }" "{ output | output.png | Path to the output file }"; -#ifdef HAVE_OAK - int main(int argc, char *argv[]) { cv::CommandLineParser cmd(argc, argv, keys); if (cmd.has("help")) { @@ -58,12 +56,3 @@ int main(int argc, char *argv[]) { std::cout << "Pipeline finished: " << output_name << " file has been written." << std::endl; } - -#else // HAVE_OAK - -int main() { - GAPI_Assert(false && "Built without OAK support"); - return -1; -} - -#endif // HAVE_OAK diff --git a/modules/gapi/samples/onevpl_infer_single_roi.cpp b/modules/gapi/samples/onevpl_infer_single_roi.cpp index 80327e2d59..a89cd5071b 100644 --- a/modules/gapi/samples/onevpl_infer_single_roi.cpp +++ b/modules/gapi/samples/onevpl_infer_single_roi.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include // CommandLineParser #include @@ -44,29 +43,47 @@ const std::string keys = "{ facem | face-detection-adas-0001.xml | Path to OpenVINO IE face detection model (.xml) }" "{ faced | AUTO | Target device for face detection model (e.g. AUTO, GPU, VPU, ...) }" "{ cfg_params | :;: | Semicolon separated list of oneVPL mfxVariants which is used for configuring source (see `MFXSetConfigFilterProperty` by https://spec.oneapi.io/versions/latest/elements/oneVPL/source/index.html) }" - "{ streaming_queue_capacity | 1 | Streaming executor queue capacity. Calculated automaticaly if 0 }" + "{ streaming_queue_capacity | 1 | Streaming executor queue capacity. Calculated automatically if 0 }" "{ frames_pool_size | 0 | OneVPL source applies this parameter as preallocated frames pool size}" "{ vpp_frames_pool_size | 0 | OneVPL source applies this parameter as preallocated frames pool size for VPP preprocessing results}" - "{ source_preproc_enable | 0 | Turn on OneVPL source frame preprocessing using network input description instead of IE plugin preprocessing}"; + "{ roi | -1,-1,-1,-1 | Region of interest (ROI) to use for inference. Identified automatically when not set }"; namespace { -bool is_gpu(const std::string &device_name) { - return device_name.find("GPU") != std::string::npos; -} - std::string get_weights_path(const std::string &model_path) { const auto EXT_LEN = 4u; const auto sz = model_path.size(); - CV_Assert(sz > EXT_LEN); + GAPI_Assert(sz > EXT_LEN); auto ext = model_path.substr(sz - EXT_LEN); std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c){ return static_cast(std::tolower(c)); }); - CV_Assert(ext == ".xml"); + GAPI_Assert(ext == ".xml"); return model_path.substr(0u, sz - EXT_LEN) + ".bin"; } +// TODO: It duplicates infer_single_roi sample +cv::util::optional parse_roi(const std::string &rc) { + cv::Rect rv; + char delim[3]; + + std::stringstream is(rc); + is >> rv.x >> delim[0] >> rv.y >> delim[1] >> rv.width >> delim[2] >> rv.height; + if (is.bad()) { + return cv::util::optional(); // empty value + } + const auto is_delim = [](char c) { + return c == ','; + }; + if (!std::all_of(std::begin(delim), std::end(delim), is_delim)) { + return cv::util::optional(); // empty value + } + if (rv.x < 0 || rv.y < 0 || rv.width <= 0 || rv.height <= 0) { + return cv::util::optional(); // empty value + } + return cv::util::make_optional(std::move(rv)); +} + #ifdef HAVE_INF_ENGINE #ifdef HAVE_DIRECTX #ifdef HAVE_D3D11 @@ -128,9 +145,15 @@ using GRect = cv::GOpaque; using GSize = cv::GOpaque; using GPrims = cv::GArray; -G_API_OP(LocateROI, )>, "sample.custom.locate-roi") { - static cv::GOpaqueDesc outMeta(const cv::GOpaqueDesc &, - std::reference_wrapper) { +G_API_OP(ParseSSD, , "sample.custom.parse-ssd") { + static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GOpaqueDesc &, const cv::GOpaqueDesc &) { + return cv::empty_array_desc(); + } +}; + +// TODO: It duplicates infer_single_roi sample +G_API_OP(LocateROI, , "sample.custom.locate-roi") { + static cv::GOpaqueDesc outMeta(const cv::GOpaqueDesc &) { return cv::empty_gopaque_desc(); } }; @@ -152,29 +175,18 @@ GAPI_OCV_KERNEL(OCVLocateROI, LocateROI) { // the most convenient aspect ratio for detectors to use) static void run(const cv::Size& in_size, - std::reference_wrapper device_id_ref, cv::Rect &out_rect) { // Identify the central point & square size (- some padding) - // NB: GPU plugin in InferenceEngine doesn't support ROI at now - if (!is_gpu(device_id_ref.get())) { - const auto center = cv::Point{in_size.width/2, in_size.height/2}; - auto sqside = std::min(in_size.width, in_size.height); + const auto center = cv::Point{in_size.width/2, in_size.height/2}; + auto sqside = std::min(in_size.width, in_size.height); - // Now build the central square ROI - out_rect = cv::Rect{ center.x - sqside/2 - , center.y - sqside/2 - , sqside - , sqside - }; - } else { - // use whole frame for GPU device - out_rect = cv::Rect{ 0 - , 0 - , in_size.width - , in_size.height - }; - } + // Now build the central square ROI + out_rect = cv::Rect{ center.x - sqside/2 + , center.y - sqside/2 + , sqside + , sqside + }; } }; @@ -195,6 +207,55 @@ GAPI_OCV_KERNEL(OCVBBoxes, BBoxes) { } }; +GAPI_OCV_KERNEL(OCVParseSSD, ParseSSD) { + static void run(const cv::Mat &in_ssd_result, + const cv::Rect &in_roi, + const cv::Size &in_parent_size, + std::vector &out_objects) { + const auto &in_ssd_dims = in_ssd_result.size; + GAPI_Assert(in_ssd_dims.dims() == 4u); + + const int MAX_PROPOSALS = in_ssd_dims[2]; + const int OBJECT_SIZE = in_ssd_dims[3]; + GAPI_Assert(OBJECT_SIZE == 7); // fixed SSD object size + + const cv::Size up_roi = in_roi.size(); + const cv::Rect surface({0,0}, in_parent_size); + + out_objects.clear(); + + const float *data = in_ssd_result.ptr(); + for (int i = 0; i < MAX_PROPOSALS; i++) { + const float image_id = data[i * OBJECT_SIZE + 0]; + const float label = data[i * OBJECT_SIZE + 1]; + const float confidence = data[i * OBJECT_SIZE + 2]; + const float rc_left = data[i * OBJECT_SIZE + 3]; + const float rc_top = data[i * OBJECT_SIZE + 4]; + const float rc_right = data[i * OBJECT_SIZE + 5]; + const float rc_bottom = data[i * OBJECT_SIZE + 6]; + (void) label; // unused + + if (image_id < 0.f) { + break; // marks end-of-detections + } + if (confidence < 0.5f) { + continue; // skip objects with low confidence + } + + // map relative coordinates to the original image scale + // taking the ROI into account + cv::Rect rc; + rc.x = static_cast(rc_left * up_roi.width); + rc.y = static_cast(rc_top * up_roi.height); + rc.width = static_cast(rc_right * up_roi.width) - rc.x; + rc.height = static_cast(rc_bottom * up_roi.height) - rc.y; + rc.x += in_roi.x; + rc.y += in_roi.y; + out_objects.emplace_back(rc & surface); + } + } +}; + } // namespace custom namespace cfg { @@ -213,14 +274,14 @@ int main(int argc, char *argv[]) { // get file name const auto file_path = cmd.get("input"); const auto output = cmd.get("output"); + const auto opt_roi = parse_roi(cmd.get("roi")); const auto face_model_path = cmd.get("facem"); const auto streaming_queue_capacity = cmd.get("streaming_queue_capacity"); const auto source_decode_queue_capacity = cmd.get("frames_pool_size"); const auto source_vpp_queue_capacity = cmd.get("vpp_frames_pool_size"); - const auto vpl_source_preproc_enable = cmd.get("source_preproc_enable"); const auto device_id = cmd.get("faced"); - // check ouput file extension + // check output file extension if (!output.empty()) { auto ext = output.find_last_of("."); if (ext == std::string::npos || (output.substr(ext + 1) != "avi")) { @@ -235,12 +296,6 @@ int main(int argc, char *argv[]) { try { std::string line; while (std::getline(params_list, line, ';')) { - if (vpl_source_preproc_enable == 0) { - if (line.find("vpp.") != std::string::npos) { - // skip VPP preprocessing primitives if not requested - continue; - } - } source_cfgs.push_back(cfg::create_from_string(line)); } } catch (const std::exception& ex) { @@ -267,8 +322,8 @@ int main(int argc, char *argv[]) { // GAPI InferenceEngine backend to provide interoperability with onevpl::GSource // So GAPI InferenceEngine backend and onevpl::GSource MUST share the same // device and context - void* accel_device_ptr = nullptr; - void* accel_ctx_ptr = nullptr; + cv::util::optional accel_device; + cv::util::optional accel_ctx; #ifdef HAVE_INF_ENGINE #ifdef HAVE_DIRECTX @@ -276,7 +331,7 @@ int main(int argc, char *argv[]) { auto dx11_dev = createCOMPtrGuard(); auto dx11_ctx = createCOMPtrGuard(); - if (is_gpu(device_id)) { + if (device_id.find("GPU") != std::string::npos) { auto adapter_factory = createCOMPtrGuard(); { IDXGIFactory* out_factory = nullptr; @@ -310,8 +365,13 @@ int main(int argc, char *argv[]) { } std::tie(dx11_dev, dx11_ctx) = create_device_with_ctx(intel_adapter.get()); - accel_device_ptr = reinterpret_cast(dx11_dev.get()); - accel_ctx_ptr = reinterpret_cast(dx11_ctx.get()); + accel_device = cv::util::make_optional( + cv::gapi::wip::onevpl::create_dx11_device( + reinterpret_cast(dx11_dev.get()), + device_id)); + accel_ctx = cv::util::make_optional( + cv::gapi::wip::onevpl::create_dx11_context( + reinterpret_cast(dx11_ctx.get()))); // put accel type description for VPL source source_cfgs.push_back(cfg::create_from_string( @@ -323,30 +383,27 @@ int main(int argc, char *argv[]) { #endif // HAVE_D3D11 #endif // HAVE_DIRECTX // set ctx_config for GPU device only - no need in case of CPU device type - if (is_gpu(device_id)) { + if (accel_device.has_value() && + accel_device.value().get_name().find("GPU") != std::string::npos) { InferenceEngine::ParamMap ctx_config({{"CONTEXT_TYPE", "VA_SHARED"}, - {"VA_DEVICE", accel_device_ptr} }); - + {"VA_DEVICE", accel_device.value().get_ptr()} }); face_net.cfgContextParams(ctx_config); - face_net.pluginConfig({{"GPU_NV12_TWO_INPUTS", "YES" }}); - std::cout <<"/*******************************************************/\n" - "ATTENTION: GPU Inference Engine preprocessing is not vital as expected!" - " Please consider param \"source_preproc_enable=1\" and specify " - " appropriated media frame transformation using oneVPL::VPP primitives" - " which force onevpl::GSource to produce tranformed media frames." - " For exploring list of supported transformations please find out " - " vpp_* related stuff in" - " gapi/include/opencv2/gapi/streaming/onevpl/cfg_params.hpp" - " Pay attention that to obtain expected result In this case VPP " - " transformation must match network input params." - " Please vote/create issue about exporting network params using GAPI\n" - "/******************************************************/" << std::endl; + // NB: consider NV12 surface because it's one of native GPU image format + face_net.pluginConfig({{"GPU_NV12_TWO_INPUTS", "YES" }}); } #endif // HAVE_INF_ENGINE + // turn on preproc + if (accel_device.has_value() && accel_ctx.has_value()) { + face_net.cfgPreprocessingParams(accel_device.value(), + accel_ctx.value()); + std::cout << "enforce VPP preprocessing on " << device_id << std::endl; + } + auto kernels = cv::gapi::kernels < custom::OCVLocateROI + , custom::OCVParseSSD , custom::OCVBBoxes>(); auto networks = cv::gapi::networks(face_net); auto face_detection_args = cv::compile_args(networks, kernels); @@ -355,17 +412,16 @@ int main(int argc, char *argv[]) { } // Create source - cv::Ptr cap; + cv::gapi::wip::IStreamSource::Ptr cap; try { - if (is_gpu(device_id)) { + if (accel_device.has_value() && accel_ctx.has_value()) { cap = cv::gapi::wip::make_onevpl_src(file_path, source_cfgs, - device_id, - accel_device_ptr, - accel_ctx_ptr); + accel_device.value(), + accel_ctx.value()); } else { cap = cv::gapi::wip::make_onevpl_src(file_path, source_cfgs); } - std::cout << "oneVPL source desription: " << cap->descr_of() << std::endl; + std::cout << "oneVPL source description: " << cap->descr_of() << std::endl; } catch (const std::exception& ex) { std::cerr << "Cannot create source: " << ex.what() << std::endl; return -1; @@ -373,29 +429,35 @@ int main(int argc, char *argv[]) { cv::GMetaArg descr = cap->descr_of(); auto frame_descr = cv::util::get(descr); + cv::GOpaque in_roi; + auto inputs = cv::gin(cap); // Now build the graph cv::GFrame in; auto size = cv::gapi::streaming::size(in); - auto roi = custom::LocateROI::on(size, std::cref(device_id)); - auto blob = cv::gapi::infer(roi, in); - cv::GArray rcs = cv::gapi::parseSSD(blob, size, 0.5f, true, true); - auto out_frame = cv::gapi::wip::draw::renderFrame(in, custom::BBoxes::on(rcs, roi)); - auto out = cv::gapi::streaming::BGR(out_frame); - - cv::GStreamingCompiled pipeline; - try { - pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out)) - .compileStreaming(std::move(face_detection_args)); - } catch (const std::exception& ex) { - std::cerr << "Exception occured during pipeline construction: " << ex.what() << std::endl; - return -1; + auto graph_inputs = cv::GIn(in); + if (!opt_roi.has_value()) { + // Automatically detect ROI to infer. Make it output parameter + std::cout << "ROI is not set or invalid. Locating it automatically" + << std::endl; + in_roi = custom::LocateROI::on(size); + } else { + // Use the value provided by user + std::cout << "Will run inference for static region " + << opt_roi.value() + << " only" + << std::endl; + graph_inputs += cv::GIn(in_roi); + inputs += cv::gin(opt_roi.value()); } + auto blob = cv::gapi::infer(in_roi, in); + cv::GArray rcs = custom::ParseSSD::on(blob, in_roi, size); + auto out_frame = cv::gapi::wip::draw::renderFrame(in, custom::BBoxes::on(rcs, in_roi)); + auto out = cv::gapi::streaming::BGR(out_frame); + cv::GStreamingCompiled pipeline = cv::GComputation(std::move(graph_inputs), cv::GOut(out)) // and move here + .compileStreaming(std::move(face_detection_args)); // The execution part - - // TODO USE may set pool size from outside and set queue_capacity size, - // compile arg: cv::gapi::streaming::queue_capacity - pipeline.setSource(std::move(cap)); + pipeline.setSource(std::move(inputs)); pipeline.start(); size_t frames = 0u; @@ -404,7 +466,7 @@ int main(int argc, char *argv[]) { if (!output.empty() && !writer.isOpened()) { const auto sz = cv::Size{frame_descr.size.width, frame_descr.size.height}; writer.open(output, cv::VideoWriter::fourcc('M','J','P','G'), 25.0, sz); - CV_Assert(writer.isOpened()); + GAPI_Assert(writer.isOpened()); } cv::Mat outMat; @@ -419,6 +481,7 @@ int main(int argc, char *argv[]) { } tm.stop(); std::cout << "Processed " << frames << " frames" << " (" << frames / tm.getTimeSec() << " FPS)" << std::endl; + return 0; } diff --git a/modules/gapi/samples/pipeline_modeling_tool.cpp b/modules/gapi/samples/pipeline_modeling_tool.cpp index ca6187e1ca..7a0f94655c 100644 --- a/modules/gapi/samples/pipeline_modeling_tool.cpp +++ b/modules/gapi/samples/pipeline_modeling_tool.cpp @@ -10,7 +10,9 @@ #include #if defined(_WIN32) +#define NOMINMAX #include +#undef NOMINMAX #endif #include "pipeline_modeling_tool/dummy_source.hpp" @@ -173,6 +175,50 @@ static PLMode strToPLMode(const std::string& mode_str) { } } +template <> +CallParams read(const cv::FileNode& fn) { + auto name = + check_and_read(fn, "name", "node"); + // FIXME: Impossible to read size_t due OpenCV limitations. + auto call_every_nth_opt = readOpt(fn["call_every_nth"]); + auto call_every_nth = call_every_nth_opt.value_or(1); + if (call_every_nth <= 0) { + throw std::logic_error( + name + " call_every_nth must be greater than zero\n" + "Current call_every_nth: " + std::to_string(call_every_nth)); + } + return CallParams{std::move(name), static_cast(call_every_nth)}; +} + +template <> +InferParams read(const cv::FileNode& fn) { + auto name = + check_and_read(fn, "name", "node"); + + InferParams params; + params.path = read(fn); + params.device = check_and_read(fn, "device", name); + params.input_layers = readList(fn, "input_layers", name); + params.output_layers = readList(fn, "output_layers", name); + + return params; +} + +template <> +DummyParams read(const cv::FileNode& fn) { + auto name = + check_and_read(fn, "name", "node"); + + DummyParams params; + params.time = check_and_read(fn, "time", name); + if (params.time < 0) { + throw std::logic_error(name + " time must be positive"); + } + params.output = check_and_read(fn, "output", name); + + return params; +} + static std::vector parseExecList(const std::string& exec_list) { std::vector pl_types; std::stringstream ss(exec_list); @@ -224,6 +270,7 @@ int main(int argc, char* argv[]) { " if set to 0. If it's specified will be" " applied for every pipeline. }" "{ app_mode | realtime | Application mode (realtime/benchmark). }" + "{ drop_frames | false | Drop frames if they come earlier than pipeline is completed. }" "{ exec_list | | A comma-separated list of pipelines that" " will be executed. Spaces around commas" " are prohibited. }"; @@ -238,10 +285,11 @@ int main(int argc, char* argv[]) { const auto load_config = cmd.get("load_config"); const auto cached_dir = cmd.get("cache_dir"); const auto log_file = cmd.get("log_file"); - const auto pl_mode = strToPLMode(cmd.get("pl_mode")); + const auto cmd_pl_mode = strToPLMode(cmd.get("pl_mode")); const auto qc = cmd.get("qc"); const auto app_mode = strToAppMode(cmd.get("app_mode")); const auto exec_str = cmd.get("exec_list"); + const auto drop_frames = cmd.get("drop_frames"); cv::FileStorage fs; if (cfg.empty()) { @@ -306,38 +354,25 @@ int main(int argc, char* argv[]) { if (app_mode == AppMode::BENCHMARK) { latency = 0.0; } - builder.setSource(src_name, latency, output); + auto src = std::make_shared(latency, output, drop_frames); + builder.setSource(src_name, src); } const auto& nodes_fn = check_and_get_fn(pl_fn, "nodes", name); if (!nodes_fn.isSeq()) { throw std::logic_error("nodes in " + name + " must be a sequence"); } + for (auto node_fn : nodes_fn) { - auto node_name = - check_and_read(node_fn, "name", "node"); + auto call_params = read(node_fn); auto node_type = check_and_read(node_fn, "type", "node"); if (node_type == "Dummy") { - auto time = - check_and_read(node_fn, "time", node_name); - if (time < 0) { - throw std::logic_error(node_name + " time must be positive"); - } - auto output = - check_and_read(node_fn, "output", node_name); - builder.addDummy(node_name, time, output); + builder.addDummy(call_params, read(node_fn)); } else if (node_type == "Infer") { - InferParams params; - params.path = read(node_fn); - params.device = - check_and_read(node_fn, "device", node_name); - params.input_layers = - readList(node_fn, "input_layers", node_name); - params.output_layers = - readList(node_fn, "output_layers", node_name); - params.config = config; - builder.addInfer(node_name, params); + auto infer_params = read(node_fn); + infer_params.config = config; + builder.addInfer(call_params, infer_params); } else { throw std::logic_error("Unsupported node type: " + node_type); } @@ -352,9 +387,18 @@ int main(int argc, char* argv[]) { builder.addEdge(edge); } + auto cfg_pl_mode = readOpt(pl_fn["mode"]); // NB: Pipeline mode from config takes priority over cmd. - auto mode = readOpt(pl_fn["mode"]); - builder.setMode(mode.has_value() ? strToPLMode(mode.value()) : pl_mode); + auto pl_mode = cfg_pl_mode.has_value() + ? strToPLMode(cfg_pl_mode.value()) : cmd_pl_mode; + // NB: Using drop_frames with streaming pipelines will follow to + // incorrect performance results. + if (drop_frames && pl_mode == PLMode::STREAMING) { + throw std::logic_error( + "--drop_frames option is supported only for pipelines in \"regular\" mode"); + } + + builder.setMode(pl_mode); // NB: Queue capacity from config takes priority over cmd. auto config_qc = readOpt(pl_fn["queue_capacity"]); diff --git a/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp b/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp index 1514eb2671..d77e120081 100644 --- a/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp +++ b/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp @@ -14,21 +14,23 @@ class DummySource final: public cv::gapi::wip::IStreamSource { public: using Ptr = std::shared_ptr; DummySource(const double latency, - const OutputDescr& output); + const OutputDescr& output, + const bool drop_frames); bool pull(cv::gapi::wip::Data& data) override; cv::GMetaArg descr_of() const override; private: double m_latency; cv::Mat m_mat; - using TimePoint = - std::chrono::time_point; - cv::optional m_prev_pull_tp; + bool m_drop_frames; + double m_next_tick_ts = -1; + int64_t m_curr_seq_id = 0; }; DummySource::DummySource(const double latency, - const OutputDescr& output) - : m_latency(latency) { + const OutputDescr& output, + const bool drop_frames) + : m_latency(latency), m_drop_frames(drop_frames) { utils::createNDMat(m_mat, output.dims, output.precision); utils::generateRandom(m_mat); } @@ -36,23 +38,60 @@ DummySource::DummySource(const double latency, bool DummySource::pull(cv::gapi::wip::Data& data) { using namespace std::chrono; using namespace cv::gapi::streaming; - // NB: In case it's the first pull. - if (!m_prev_pull_tp) { - m_prev_pull_tp = cv::util::make_optional(high_resolution_clock::now()); + + // NB: Wait m_latency before return the first frame. + if (m_next_tick_ts == -1) { + m_next_tick_ts = utils::timestamp() + m_latency; } + + int64_t curr_ts = utils::timestamp(); + if (curr_ts < m_next_tick_ts) { + /* + * curr_ts + * | + * ------|----*-----|-------> + * ^ + * m_next_tick_ts + * + * + * NB: New frame will be produced at the m_next_tick_ts point. + */ + utils::sleep(m_next_tick_ts - curr_ts); + } else { + /* + * curr_ts + * +1 +2 | + * |----------|----------|----------|----*-----|-------> + * ^ ^ + * m_next_tick_ts -------------> + * + * + * NB: Shift m_next_tick_ts to the nearest tick before curr_ts and + * update current seq_id correspondingly. + * + * if drop_frames is enabled, wait for the next tick, otherwise + * return last written frame (+2 at the picture above) immediately. + */ + int64_t num_frames = + static_cast((curr_ts - m_next_tick_ts) / m_latency); + m_curr_seq_id += num_frames; + m_next_tick_ts += num_frames * m_latency; + if (m_drop_frames) { + m_next_tick_ts += m_latency; + ++m_curr_seq_id; + utils::sleep(m_next_tick_ts - curr_ts); + } + } + // NB: Just increase reference counter not to release mat memory // after assigning it to the data. cv::Mat mat = m_mat; - auto end = high_resolution_clock::now(); - auto elapsed = - duration_cast>(end - *m_prev_pull_tp).count(); - auto delta = m_latency - elapsed; - if (delta > 0) { - utils::sleep(delta); - } - data.meta[meta_tag::timestamp] = int64_t{utils::timestamp()}; + + data.meta[meta_tag::timestamp] = utils::timestamp(); + data.meta[meta_tag::seq_id] = m_curr_seq_id++; data = mat; - m_prev_pull_tp = cv::util::make_optional(high_resolution_clock::now()); + m_next_tick_ts += m_latency; + return true; } diff --git a/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp b/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp index 63ada28603..a3f187249d 100644 --- a/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp +++ b/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp @@ -23,11 +23,16 @@ struct Edge { P dst; }; +struct CallParams { + std::string name; + size_t call_every_nth; +}; + struct CallNode { using F = std::function; - std::string name; - F run; + CallParams params; + F run; }; struct DataNode { @@ -44,6 +49,80 @@ struct Node { Kind kind; }; +struct SubGraphCall { + G_API_OP(GSubGraph, + , + "custom.subgraph") { + static cv::GMatDesc outMeta(const cv::GMatDesc& in, + cv::GComputation comp, + cv::GCompileArgs compile_args, + const size_t call_every_nth) { + GAPI_Assert(call_every_nth > 0); + auto out_metas = + comp.compile(in, std::move(compile_args)).outMetas(); + GAPI_Assert(out_metas.size() == 1u); + GAPI_Assert(cv::util::holds_alternative(out_metas[0])); + return cv::util::get(out_metas[0]); + } + + }; + + struct SubGraphState { + cv::Mat last_result; + cv::GCompiled cc; + int call_counter = 0; + }; + + GAPI_OCV_KERNEL_ST(SubGraphImpl, GSubGraph, SubGraphState) { + static void setup(const cv::GMatDesc& in, + cv::GComputation comp, + cv::GCompileArgs compile_args, + const size_t /*call_every_nth*/, + std::shared_ptr& state, + const cv::GCompileArgs& /*args*/) { + state.reset(new SubGraphState{}); + state->cc = comp.compile(in, std::move(compile_args)); + auto out_desc = + cv::util::get(state->cc.outMetas()[0]); + utils::createNDMat(state->last_result, + out_desc.dims, + out_desc.depth); + } + + static void run(const cv::Mat& in, + cv::GComputation /*comp*/, + cv::GCompileArgs /*compile_args*/, + const size_t call_every_nth, + cv::Mat& out, + SubGraphState& state) { + // NB: Make a call on the first iteration and skip the furthers. + if (state.call_counter == 0) { + state.cc(in, state.last_result); + } + state.last_result.copyTo(out); + state.call_counter = (state.call_counter + 1) % call_every_nth; + } + }; + + void operator()(const cv::GProtoArgs& inputs, cv::GProtoArgs& outputs); + + size_t numInputs() const { return 1; } + size_t numOutputs() const { return 1; } + + cv::GComputation comp; + cv::GCompileArgs compile_args; + size_t call_every_nth; +}; + +void SubGraphCall::operator()(const cv::GProtoArgs& inputs, + cv::GProtoArgs& outputs) { + GAPI_Assert(inputs.size() == 1u); + GAPI_Assert(cv::util::holds_alternative(inputs[0])); + GAPI_Assert(outputs.empty()); + auto in = cv::util::get(inputs[0]); + outputs.emplace_back(GSubGraph::on(in, comp, compile_args, call_every_nth)); +} + struct DummyCall { G_API_OP(GDummy, , @@ -54,7 +133,8 @@ struct DummyCall { if (output.dims.size() == 2) { return cv::GMatDesc(output.precision, 1, - cv::Size(output.dims[0], output.dims[1])); + // NB: Dims[H, W] -> Size(W, H) + cv::Size(output.dims[1], output.dims[0])); } return cv::GMatDesc(output.precision, output.dims); } @@ -166,6 +246,11 @@ struct ImportPath { using ModelPath = cv::util::variant; +struct DummyParams { + double time; + OutputDescr output; +}; + struct InferParams { std::string name; ModelPath path; @@ -178,15 +263,14 @@ struct InferParams { class PipelineBuilder { public: PipelineBuilder(); - void addDummy(const std::string& name, - const double time, - const OutputDescr& output); + void addDummy(const CallParams& call_params, + const DummyParams& dummy_params); - void addInfer(const std::string& name, const InferParams& params); + void addInfer(const CallParams& call_params, + const InferParams& infer_params); - void setSource(const std::string& name, - double latency, - const OutputDescr& output); + void setSource(const std::string& name, + std::shared_ptr src); void addEdge(const Edge& edge); void setMode(PLMode mode); @@ -198,8 +282,8 @@ public: private: template - void addCall(const std::string& name, - CallT&& call); + void addCall(const CallParams& call_params, + CallT&& call); Pipeline::Ptr construct(); @@ -227,20 +311,21 @@ private: PipelineBuilder::PipelineBuilder() : m_state(new State{}) { }; -void PipelineBuilder::addDummy(const std::string& name, - const double time, - const OutputDescr& output) { +void PipelineBuilder::addDummy(const CallParams& call_params, + const DummyParams& dummy_params) { m_state->kernels.include(); - addCall(name, DummyCall{time, output}); + addCall(call_params, + DummyCall{dummy_params.time, dummy_params.output}); } template -void PipelineBuilder::addCall(const std::string& name, - CallT&& call) { +void PipelineBuilder::addCall(const CallParams& call_params, + CallT&& call) { size_t num_inputs = call.numInputs(); size_t num_outputs = call.numOutputs(); - Node::Ptr call_node(new Node{{},{},Node::Kind{CallNode{name, std::move(call)}}}); + Node::Ptr call_node(new Node{{},{},Node::Kind{CallNode{call_params, + std::move(call)}}}); // NB: Create placeholders for inputs. call_node->in_nodes.resize(num_inputs); // NB: Create outputs with empty data. @@ -250,36 +335,39 @@ void PipelineBuilder::addCall(const std::string& name, Node::Kind{DataNode{}}}); } - auto it = m_state->calls_map.find(name); + auto it = m_state->calls_map.find(call_params.name); if (it != m_state->calls_map.end()) { - throw std::logic_error("Node: " + name + " already exists!"); + throw std::logic_error("Node: " + call_params.name + " already exists!"); } - m_state->calls_map.emplace(name, call_node); + m_state->calls_map.emplace(call_params.name, call_node); m_state->all_calls.emplace_back(call_node); } -void PipelineBuilder::addInfer(const std::string& name, - const InferParams& params) { +void PipelineBuilder::addInfer(const CallParams& call_params, + const InferParams& infer_params) { // NB: No default ctor for Params. std::unique_ptr> pp; - if (cv::util::holds_alternative(params.path)) { - auto load_path = cv::util::get(params.path); - pp.reset(new cv::gapi::ie::Params(name, + if (cv::util::holds_alternative(infer_params.path)) { + auto load_path = cv::util::get(infer_params.path); + pp.reset(new cv::gapi::ie::Params(call_params.name, load_path.xml, load_path.bin, - params.device)); + infer_params.device)); } else { - GAPI_Assert(cv::util::holds_alternative(params.path)); - auto import_path = cv::util::get(params.path); - pp.reset(new cv::gapi::ie::Params(name, + GAPI_Assert(cv::util::holds_alternative(infer_params.path)); + auto import_path = cv::util::get(infer_params.path); + pp.reset(new cv::gapi::ie::Params(call_params.name, import_path.blob, - params.device)); + infer_params.device)); } - pp->pluginConfig(params.config); + pp->pluginConfig(infer_params.config); m_state->networks += cv::gapi::networks(*pp); - addCall(name, InferCall{name, params.input_layers, params.output_layers}); + addCall(call_params, + InferCall{call_params.name, + infer_params.input_layers, + infer_params.output_layers}); } void PipelineBuilder::addEdge(const Edge& edge) { @@ -315,12 +403,11 @@ void PipelineBuilder::addEdge(const Edge& edge) { out_data->out_nodes.push_back(dst_node); } -void PipelineBuilder::setSource(const std::string& name, - double latency, - const OutputDescr& output) { - GAPI_Assert(!m_state->src); - m_state->src = std::make_shared(latency, output); - addCall(name, SourceCall{}); +void PipelineBuilder::setSource(const std::string& name, + std::shared_ptr src) { + GAPI_Assert(!m_state->src && "Only single source pipelines are supported!"); + m_state->src = src; + addCall(CallParams{name, 1u/*call_every_nth*/}, SourceCall{}); } void PipelineBuilder::setMode(PLMode mode) { @@ -407,7 +494,7 @@ Pipeline::Ptr PipelineBuilder::construct() { if (in_data_node.expired()) { const auto& call = cv::util::get(call_node->kind); throw std::logic_error( - "Node: " + call.name + " in Pipeline: " + m_state->name + + "Node: " + call.params.name + " in Pipeline: " + m_state->name + " has dangling input by in port: " + std::to_string(i)); } } @@ -426,8 +513,14 @@ Pipeline::Ptr PipelineBuilder::construct() { sorted_calls.push_back(n); } } + + m_state->kernels.include(); + m_state->compile_args.emplace_back(m_state->networks); + m_state->compile_args.emplace_back(m_state->kernels); + // (2). Go through every call node. for (auto call_node : sorted_calls) { + auto& call = cv::util::get(call_node->kind); cv::GProtoArgs outputs; cv::GProtoArgs inputs; for (size_t i = 0; i < call_node->in_nodes.size(); ++i) { @@ -439,8 +532,37 @@ Pipeline::Ptr PipelineBuilder::construct() { // (3). Extract proto input from every input node. inputs.push_back(in_data.arg.value()); } + // NB: If node shouldn't be called on each iterations, + // it should be wrapped into subgraph which is able to skip calling. + if (call.params.call_every_nth != 1u) { + // FIXME: Limitation of the subgraph operation (). + // G-API doesn't support dynamic number of inputs/outputs. + if (inputs.size() > 1u) { + throw std::logic_error( + "skip_frame_nth is supported only for single input subgraphs\n" + "Current subgraph has " + std::to_string(inputs.size()) + " inputs"); + } + + if (outputs.size() > 1u) { + throw std::logic_error( + "skip_frame_nth is supported only for single output subgraphs\n" + "Current subgraph has " + std::to_string(inputs.size()) + " outputs"); + } + // FIXME: Should be generalized. + // Now every subgraph contains only single node + // which has single input/output. + GAPI_Assert(cv::util::holds_alternative(inputs[0])); + cv::GProtoArgs subgr_inputs{cv::GProtoArg{cv::GMat()}}; + cv::GProtoArgs subgr_outputs; + call.run(subgr_inputs, subgr_outputs); + auto comp = cv::GComputation(cv::GProtoInputArgs{subgr_inputs}, + cv::GProtoOutputArgs{subgr_outputs}); + call = CallNode{CallParams{call.params.name, 1u/*call_every_nth*/}, + SubGraphCall{std::move(comp), + m_state->compile_args, + call.params.call_every_nth}}; + } // (4). Run call and get outputs. - auto call = cv::util::get(call_node->kind); call.run(inputs, outputs); // (5) If call node doesn't have inputs // it means that it's input producer node (Source). @@ -462,9 +584,6 @@ Pipeline::Ptr PipelineBuilder::construct() { } } - m_state->compile_args.emplace_back(m_state->networks); - m_state->compile_args.emplace_back(m_state->kernels); - if (m_state->mode == PLMode::STREAMING) { GAPI_Assert(graph_inputs.size() == 1); GAPI_Assert(cv::util::holds_alternative(graph_inputs[0])); diff --git a/modules/gapi/samples/pipeline_modeling_tool/test_pipeline_modeling_tool.py b/modules/gapi/samples/pipeline_modeling_tool/test_pipeline_modeling_tool.py index ef4bce6476..d56a0399e9 100644 --- a/modules/gapi/samples/pipeline_modeling_tool/test_pipeline_modeling_tool.py +++ b/modules/gapi/samples/pipeline_modeling_tool/test_pipeline_modeling_tool.py @@ -907,25 +907,78 @@ def test_error_invalid_pl_mode(): cfg_file = """\"%YAML:1.0 work_time: 1000 Pipelines: -PL1: - source: - name: 'Src' - latency: 20 - output: - dims: [1,2,3,4] - precision: 'U8' - nodes: - - name: 'Node0' - type: 'Dummy' - time: 0.2 + PL1: + source: + name: 'Src' + latency: 20 output: dims: [1,2,3,4] precision: 'U8' - edges: - - from: 'Src' - to: 'Node0'\" """ + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + edges: + - from: 'Src' + to: 'Node0'\" """ exec_str = '{} --cfg={} --app_mode=unknown'.format(pipeline_modeling_tool, cfg_file) out = get_output(exec_str) assert out.startswith('Unsupported AppMode: unknown\n' 'Please chose between: realtime and benchmark') + + +def test_error_drop_frames_with_streaming(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + time: 0.2 + output: + dims: [1,2,3,4] + precision: 'U8' + edges: + - from: 'Src' + to: 'Node0'\" """ + + exec_str = '{} --cfg={} --pl_mode=streaming --drop_frames'.format(pipeline_modeling_tool, cfg_file) + out = get_output(exec_str) + assert out.startswith('--drop_frames option is supported only for pipelines in "regular" mode') + + +def test_incorrect_call_every_nth(): + cfg_file = """\"%YAML:1.0 +work_time: 1000 +Pipelines: + PL1: + source: + name: 'Src' + latency: 20 + output: + dims: [1,2,3,4] + precision: 'U8' + nodes: + - name: 'Node0' + type: 'Dummy' + call_every_nth: {}\" """ + + error = 'Node0 call_every_nth must be greater than zero\nCurrent call_every_nth: {}' + + def check(cfg_file, call_every_nth): + out = get_output('{} --cfg={}'.format(pipeline_modeling_tool, cfg_file.format(call_every_nth))) + assert out.startswith(error.format(call_every_nth)) + + check(cfg_file, -3) + check(cfg_file, 0) diff --git a/modules/gapi/src/api/gbackend.cpp b/modules/gapi/src/api/gbackend.cpp index f063dd0eb9..e3b1e7123d 100644 --- a/modules/gapi/src/api/gbackend.cpp +++ b/modules/gapi/src/api/gbackend.cpp @@ -411,6 +411,12 @@ void createMat(const cv::GMatDesc &desc, cv::Mat& mat) { GAPI_Assert(!desc.planar); mat.create(desc.dims, desc.depth); +#if !defined(GAPI_STANDALONE) + // NB: WA for 1D mats. + if (desc.dims.size() == 1u) { + mat.dims = 1; + } +#endif } } diff --git a/modules/gapi/src/api/kernels_streaming.cpp b/modules/gapi/src/api/kernels_streaming.cpp index 2c50551f4e..55834ffb9a 100644 --- a/modules/gapi/src/api/kernels_streaming.cpp +++ b/modules/gapi/src/api/kernels_streaming.cpp @@ -39,7 +39,7 @@ cv::GMat cv::gapi::streaming::desync(const cv::GMat &g) { // // At the same time, generally, every island in the streaming // graph gets its individual input as a queue (so normally, a - // writer pushes the same output MULTIPLE TIMES if it has mutliple + // writer pushes the same output MULTIPLE TIMES if it has multiple // readers): // // LWV diff --git a/modules/gapi/src/backends/common/gbackend.hpp b/modules/gapi/src/backends/common/gbackend.hpp index 99b8f5dd37..b05d8e2c36 100644 --- a/modules/gapi/src/backends/common/gbackend.hpp +++ b/modules/gapi/src/backends/common/gbackend.hpp @@ -173,7 +173,7 @@ namespace magazine // without utilizing magazine at all void GAPI_EXPORTS bindInArg (Mag& mag, const RcDesc &rc, const GRunArg &arg, HandleRMat handleRMat = HandleRMat::BIND); - // Extracts a memory object reference fro GRunArgP, stores it in appropriate slot in a magazine + // Extracts a memory object reference from GRunArgP, stores it in appropriate slot in a magazine // Note on RMat handling from bindInArg above is also applied here void GAPI_EXPORTS bindOutArg(Mag& mag, const RcDesc &rc, const GRunArgP &arg, HandleRMat handleRMat = HandleRMat::BIND); diff --git a/modules/gapi/src/backends/common/serialization.hpp b/modules/gapi/src/backends/common/serialization.hpp index 529fdc635d..3ba2e83581 100644 --- a/modules/gapi/src/backends/common/serialization.hpp +++ b/modules/gapi/src/backends/common/serialization.hpp @@ -164,7 +164,7 @@ GAPI_EXPORTS void serialize( IOStream& os GAPI_EXPORTS GSerialized deserialize(IIStream& is); GAPI_EXPORTS void reconstruct(const GSerialized &s, ade::Graph &g); -// FIXME: Basic Stream implementaions ////////////////////////////////////////// +// FIXME: Basic Stream implementations ///////////////////////////////////////// // Basic in-memory stream implementations. class GAPI_EXPORTS ByteMemoryOutStream final: public IOStream { diff --git a/modules/gapi/src/backends/cpu/gcpubackend.cpp b/modules/gapi/src/backends/cpu/gcpubackend.cpp index dfcaf3d478..f50f8ecd28 100644 --- a/modules/gapi/src/backends/cpu/gcpubackend.cpp +++ b/modules/gapi/src/backends/cpu/gcpubackend.cpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018-2021 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation #include "precomp.hpp" @@ -27,6 +27,7 @@ #include "api/gbackend_priv.hpp" // FIXME: Make it part of Backend SDK! #include "utils/itt.hpp" +#include "logger.hpp" // FIXME: Is there a way to take a typed graph (our GModel), // and create a new typed graph _ATOP_ of that (by extending with a couple of @@ -88,7 +89,7 @@ cv::gimpl::GCPUExecutable::GCPUExecutable(const ade::Graph &g, { case NodeType::OP: { - m_script.push_back({nh, GModel::collectOutputMeta(m_gm, nh)}); + m_opNodes.push_back(nh); // If kernel is stateful then prepare storage for its state. GCPUKernel k = gcm.metadata(nh).get().k; @@ -107,21 +108,12 @@ cv::gimpl::GCPUExecutable::GCPUExecutable(const ade::Graph &g, auto rc = RcDesc{desc.rc, desc.shape, desc.ctor}; magazine::bindInArg(m_res, rc, m_gm.metadata(nh).get().arg); } - //preallocate internal Mats in advance - if (desc.storage == Data::Storage::INTERNAL && desc.shape == GShape::GMAT) - { - const auto mat_desc = util::get(desc.meta); - auto& mat = m_res.slot()[desc.rc]; - createMat(mat_desc, mat); - } break; } default: util::throw_error(std::logic_error("Unsupported NodeType type")); } } - - // For each stateful kernel call 'setup' user callback to initialize state. - setupKernelStates(); + makeReshape(); } // FIXME: Document what it does @@ -176,9 +168,44 @@ void cv::gimpl::GCPUExecutable::setupKernelStates() } } +void cv::gimpl::GCPUExecutable::makeReshape() { + // Prepare the execution script + m_script.clear(); + for (auto &nh : m_opNodes) { + m_script.push_back({nh, GModel::collectOutputMeta(m_gm, nh)}); + } + + // Preallocate internal mats + for (auto& nh : m_dataNodes) { + const auto& desc = m_gm.metadata(nh).get(); + if (desc.storage == Data::Storage::INTERNAL && desc.shape == GShape::GMAT) { + const auto mat_desc = util::get(desc.meta); + auto& mat = m_res.slot()[desc.rc]; + createMat(mat_desc, mat); + } + } +} + +void cv::gimpl::GCPUExecutable::reshape(ade::Graph&, const GCompileArgs& args) { + m_compileArgs = args; + makeReshape(); + // TODO: Add an input meta sensitivity flag to stateful kernels. + // When reshape() happens, reset state for meta-sensitive kernels only + if (!m_nodesToStates.empty()) { + std::call_once(m_warnFlag, + [](){ + GAPI_LOG_WARNING(NULL, + "\nGCPUExecutable::reshape was called. Resetting states of stateful kernels."); + }); + setupKernelStates(); + } +} + void cv::gimpl::GCPUExecutable::handleNewStream() { - m_newStreamStarted = true; + // In case if new video-stream happens - for each stateful kernel + // call 'setup' user callback to re-initialize state. + setupKernelStates(); } void cv::gimpl::GCPUExecutable::run(std::vector &&input_objs, @@ -208,14 +235,6 @@ void cv::gimpl::GCPUExecutable::run(std::vector &&input_objs, } } - // In case if new video-stream happens - for each stateful kernel - // call 'setup' user callback to re-initialize state. - if (m_newStreamStarted) - { - setupKernelStates(); - m_newStreamStarted = false; - } - // OpenCV backend execution is not a rocket science at all. // Simply invoke our kernels in the proper order. GConstGCPUModel gcm(m_g); diff --git a/modules/gapi/src/backends/cpu/gcpubackend.hpp b/modules/gapi/src/backends/cpu/gcpubackend.hpp index 6328da03b0..c8bad6c84f 100644 --- a/modules/gapi/src/backends/cpu/gcpubackend.hpp +++ b/modules/gapi/src/backends/cpu/gcpubackend.hpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation #ifndef OPENCV_GAPI_GCPUBACKEND_HPP @@ -33,7 +33,7 @@ class GCPUExecutable final: public GIslandExecutable { const ade::Graph &m_g; GModel::ConstGraph m_gm; - const cv::GCompileArgs m_compileArgs; + cv::GCompileArgs m_compileArgs; struct OperationInfo { @@ -51,29 +51,26 @@ class GCPUExecutable final: public GIslandExecutable // List of all resources in graph (both internal and external) std::vector m_dataNodes; + std::vector m_opNodes; // Actual data of all resources in graph (both internal and external) Mag m_res; - // Flag which identifies if new stream was started - bool m_newStreamStarted = false; + // A flag for call_once() (used for log warnings) + std::once_flag m_warnFlag; GArg packArg(const GArg &arg); void setupKernelStates(); + void makeReshape(); + public: GCPUExecutable(const ade::Graph &graph, const cv::GCompileArgs &compileArgs, const std::vector &nodes); - virtual inline bool canReshape() const override { return false; } - virtual inline void reshape(ade::Graph&, const GCompileArgs&) override - { - // FIXME: CPU plugin is in fact reshapeable (as it was initially, - // even before outMeta() has been introduced), so this limitation - // should be dropped. - util::throw_error(std::logic_error("GCPUExecutable::reshape() should never be called")); - } + virtual inline bool canReshape() const override { return true; } + virtual void reshape(ade::Graph&, const GCompileArgs&) override; virtual void handleNewStream() override; diff --git a/modules/gapi/src/backends/fluid/gfluidbackend.cpp b/modules/gapi/src/backends/fluid/gfluidbackend.cpp index 0e33ca9c0f..ed4dda7d49 100644 --- a/modules/gapi/src/backends/fluid/gfluidbackend.cpp +++ b/modules/gapi/src/backends/fluid/gfluidbackend.cpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation #include "precomp.hpp" @@ -954,7 +954,7 @@ namespace GFluidModel fg(graph); for (const auto& node : g.nodes()) { - if (g.metadata(node).get().t == NodeType::DATA) + if (fg.metadata(node).contains()) { auto& fd = fg.metadata(node).get(); fd.latency = 0; diff --git a/modules/gapi/src/backends/fluid/gfluidcore.cpp b/modules/gapi/src/backends/fluid/gfluidcore.cpp index 22f73e553c..7a8f1f5ed8 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore.cpp @@ -378,271 +378,11 @@ CV_ALWAYS_INLINE int absdiff_simd(const T in1[], const T in2[], T out[], int len return 0; } - -template -CV_ALWAYS_INLINE int add_simd_sametype(const T in1[], const T in2[], T out[], int length) -{ - constexpr int nlanes = static_cast(VT::nlanes); - - if (length < nlanes) - return 0; - - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - VT a = vx_load(&in1[x]); - VT b = vx_load(&in2[x]); - vx_store(&out[x], a + b); - } - - if (x < length && (in1 != out) && (in2 != out)) - { - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - - return x; -} - -template -CV_ALWAYS_INLINE int add_simd(const SRC in1[], const SRC in2[], DST out[], int length) -{ - if (std::is_same::value && !std::is_same::value) - return 0; - - if (std::is_same::value) - { - if (std::is_same::value) - { - return add_simd_sametype(reinterpret_cast(in1), - reinterpret_cast(in2), - reinterpret_cast(out), length); - } - else if (std::is_same::value) - { - return add_simd_sametype(reinterpret_cast(in1), - reinterpret_cast(in2), - reinterpret_cast(out), length); - } - else if (std::is_same::value) - { - return add_simd_sametype(reinterpret_cast(in1), - reinterpret_cast(in2), - reinterpret_cast(out), length); - } - } - else if (std::is_same::value && std::is_same::value) - { - constexpr int nlanes = static_cast(v_uint8::nlanes); - - if (length < nlanes) - return 0; - - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - v_int16 a1 = vx_load(reinterpret_cast(&in1[x])); - v_int16 a2 = vx_load(reinterpret_cast(&in1[x + nlanes / 2])); - v_int16 b1 = vx_load(reinterpret_cast(&in2[x])); - v_int16 b2 = vx_load(reinterpret_cast(&in2[x + nlanes / 2])); - - vx_store(reinterpret_cast(&out[x]), v_pack_u(a1 + b1, a2 + b2)); - } - - if (x < length) - { - CV_DbgAssert((reinterpret_cast(in1) != reinterpret_cast(out)) && - (reinterpret_cast(in2) != reinterpret_cast(out))); - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - - return x; - } - else if (std::is_same::value && std::is_same::value) - { - constexpr int nlanes = static_cast(v_uint8::nlanes); - - if (length < nlanes) - return 0; - - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - v_float32 a1 = vx_load(reinterpret_cast(&in1[x])); - v_float32 a2 = vx_load(reinterpret_cast(&in1[x + nlanes / 4])); - v_float32 a3 = vx_load(reinterpret_cast(&in1[x + 2 * nlanes / 4])); - v_float32 a4 = vx_load(reinterpret_cast(&in1[x + 3 * nlanes / 4])); - - v_float32 b1 = vx_load(reinterpret_cast(&in2[x])); - v_float32 b2 = vx_load(reinterpret_cast(&in2[x + nlanes / 4])); - v_float32 b3 = vx_load(reinterpret_cast(&in2[x + 2 * nlanes / 4])); - v_float32 b4 = vx_load(reinterpret_cast(&in2[x + 3 * nlanes / 4])); - - vx_store(reinterpret_cast(&out[x]), v_pack_u(v_pack(v_round(a1 + b1), v_round(a2 + b2)), - v_pack(v_round(a3 + b3), v_round(a4 + b4)))); - } - - if (x < length) - { - CV_DbgAssert((reinterpret_cast(in1) != reinterpret_cast(out)) && - (reinterpret_cast(in2) != reinterpret_cast(out))); - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - - return x; - } - - return 0; -} - -template -CV_ALWAYS_INLINE int sub_simd_sametype(const T in1[], const T in2[], T out[], int length) -{ - constexpr int nlanes = static_cast(VT::nlanes); - - if (length < nlanes) - return 0; - - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - VT a = vx_load(&in1[x]); - VT b = vx_load(&in2[x]); - vx_store(&out[x], a - b); - } - - if (x < length && (in1 != out) && (in2 != out)) - { - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - - return x; -} - -template -CV_ALWAYS_INLINE int sub_simd(const SRC in1[], const SRC in2[], DST out[], int length) -{ - if (std::is_same::value && !std::is_same::value) - return 0; - - if (std::is_same::value) - { - if (std::is_same::value) - { - return sub_simd_sametype(reinterpret_cast(in1), - reinterpret_cast(in2), - reinterpret_cast(out), length); - } - else if (std::is_same::value) - { - return sub_simd_sametype(reinterpret_cast(in1), - reinterpret_cast(in2), - reinterpret_cast(out), length); - } - else if (std::is_same::value) - { - return sub_simd_sametype(reinterpret_cast(in1), - reinterpret_cast(in2), - reinterpret_cast(out), length); - } - } - else if (std::is_same::value && std::is_same::value) - { - constexpr int nlanes = static_cast(v_uint8::nlanes); - - if (length < nlanes) - return 0; - - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - v_int16 a1 = vx_load(reinterpret_cast(&in1[x])); - v_int16 a2 = vx_load(reinterpret_cast(&in1[x + nlanes / 2])); - v_int16 b1 = vx_load(reinterpret_cast(&in2[x])); - v_int16 b2 = vx_load(reinterpret_cast(&in2[x + nlanes / 2])); - - vx_store(reinterpret_cast(&out[x]), v_pack_u(a1 - b1, a2 - b2)); - } - - if (x < length) - { - CV_DbgAssert((reinterpret_cast(in1) != reinterpret_cast(out)) && - (reinterpret_cast(in2) != reinterpret_cast(out))); - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - - return x; - } - else if (std::is_same::value && std::is_same::value) - { - constexpr int nlanes = static_cast(v_uint8::nlanes); - - if (length < nlanes) - return 0; - - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - v_float32 a1 = vx_load(reinterpret_cast(&in1[x])); - v_float32 a2 = vx_load(reinterpret_cast(&in1[x + nlanes / 4])); - v_float32 a3 = vx_load(reinterpret_cast(&in1[x + 2 * nlanes / 4])); - v_float32 a4 = vx_load(reinterpret_cast(&in1[x + 3 * nlanes / 4])); - - v_float32 b1 = vx_load(reinterpret_cast(&in2[x])); - v_float32 b2 = vx_load(reinterpret_cast(&in2[x + nlanes / 4])); - v_float32 b3 = vx_load(reinterpret_cast(&in2[x + 2 * nlanes / 4])); - v_float32 b4 = vx_load(reinterpret_cast(&in2[x + 3 * nlanes / 4])); - - vx_store(reinterpret_cast(&out[x]), v_pack_u(v_pack(v_round(a1 - b1), v_round(a2 - b2)), - v_pack(v_round(a3 - b3), v_round(a4 - b4)))); - } - - if (x < length) - { - CV_DbgAssert((reinterpret_cast(in1) != reinterpret_cast(out)) && - (reinterpret_cast(in2) != reinterpret_cast(out))); - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - - return x; - } - - return 0; -} #endif // CV_SIMD template -static CV_ALWAYS_INLINE void run_arithm(Buffer &dst, const View &src1, const View &src2, - Arithm arithm, double scale=1) +CV_ALWAYS_INLINE void run_arithm(Buffer &dst, const View &src1, const View &src2, + Arithm arithm, double scale=1) { static_assert(std::is_same::value, "wrong types"); @@ -652,7 +392,7 @@ static CV_ALWAYS_INLINE void run_arithm(Buffer &dst, const View &src1, const Vie int width = dst.length(); int chan = dst.meta().chan; - int length = width * chan; + const int length = width * chan; // NB: assume in/out types are not 64-bits float _scale = static_cast( scale ); @@ -708,13 +448,22 @@ GAPI_FLUID_KERNEL(GFluidAdd, cv::gapi::core::GAdd, false) static void run(const View &src1, const View &src2, int /*dtype*/, Buffer &dst) { // DST SRC1 SRC2 OP __VA_ARGS__ - BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_ADD); - BINARY_(uchar , short, short, run_arithm, dst, src1, src2, ARITHM_ADD); - BINARY_(uchar , float, float, run_arithm, dst, src1, src2, ARITHM_ADD); - BINARY_( short, short, short, run_arithm, dst, src1, src2, ARITHM_ADD); - BINARY_( float, uchar , uchar , run_arithm, dst, src1, src2, ARITHM_ADD); - BINARY_( float, short, short, run_arithm, dst, src1, src2, ARITHM_ADD); - BINARY_( float, float, float, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(uchar, uchar, uchar, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(uchar, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(uchar, short, short, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(uchar, float, float, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(short, short, short, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(short, uchar, uchar, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(short, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(short, float, float, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(ushort, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(ushort, uchar, uchar, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(ushort, short, short, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(ushort, float, float, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(float, uchar, uchar, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(float, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(float, short, short, run_arithm, dst, src1, src2, ARITHM_ADD); + BINARY_(float, float, float, run_arithm, dst, src1, src2, ARITHM_ADD); CV_Error(cv::Error::StsBadArg, "unsupported combination of types"); } @@ -728,10 +477,19 @@ GAPI_FLUID_KERNEL(GFluidSub, cv::gapi::core::GSub, false) { // DST SRC1 SRC2 OP __VA_ARGS__ BINARY_(uchar , uchar , uchar , run_arithm, dst, src1, src2, ARITHM_SUBTRACT); - BINARY_(uchar , short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); - BINARY_(uchar , float, float, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); - BINARY_( short, short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); - BINARY_( float, uchar , uchar , run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(uchar, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(uchar, short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(uchar, float, float, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(short, short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(short, uchar, uchar, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(short, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(short, float, float, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(ushort, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(ushort, uchar, uchar, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(ushort, short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(ushort, float, float, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(float, uchar, uchar, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); + BINARY_(float, ushort, ushort, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); BINARY_( float, short, short, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); BINARY_( float, float, float, run_arithm, dst, src1, src2, ARITHM_SUBTRACT); @@ -936,8 +694,8 @@ CV_ALWAYS_INLINE void run_arithm_s(Buffer &dst, const View &src, const float sca } template -static void run_arithm_rs(Buffer &dst, const View &src, const float scalar[4], Arithm arithm, - float scale=1) +CV_ALWAYS_INLINE void run_arithm_rs(Buffer &dst, const View &src, const float scalar[], + Arithm arithm, float scale=1) { const auto *in = src.InLine(0); auto *out = dst.OutLine(); @@ -955,15 +713,23 @@ static void run_arithm_rs(Buffer &dst, const View &src, const float scalar[4], A w = subrc_simd(scalar, in, out, length, chan); #endif for (; w < length; ++w) + { out[w] = subr(in[w], scalar[w % chan]); + } break; } - // TODO: optimize division case ARITHM_DIVIDE: - for (int w=0; w < width; w++) - for (int c=0; c < chan; c++) - out[chan*w + c] = div(scalar[c], in[chan*w + c], scale); + { + int w = 0; +#if CV_SIMD + w = divrc_simd(scalar, in, out, length, chan, scale); +#endif + for (; w < length; ++w) + { + out[w] = div(scalar[w % chan], in[w], scale); + } break; + } default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation"); } } @@ -1319,7 +1085,9 @@ CV_ALWAYS_INLINE void run_divc(Buffer& dst, const View& src, Buffer& scratch, #endif for (; w < length; ++w) + { out[w] = div(in[w], scalar[w % chan], scale); + } } GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, true) @@ -1402,32 +1170,55 @@ GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, true) } }; -GAPI_FLUID_KERNEL(GFluidDivRC, cv::gapi::core::GDivRC, false) +GAPI_FLUID_KERNEL(GFluidDivRC, cv::gapi::core::GDivRC, true) { static const int Window = 1; - static void run(const cv::Scalar &_scalar, const View &src, double _scale, int /*dtype*/, - Buffer &dst) + static void run(const cv::Scalar& _scalar, const View& src, double _scale, int /*dtype*/, + Buffer& dst, Buffer& scratch) { - const float scalar[4] = { - static_cast(_scalar[0]), - static_cast(_scalar[1]), - static_cast(_scalar[2]), - static_cast(_scalar[3]) - }; + GAPI_Assert(src.meta().chan <= 4); + + if (dst.y() == 0) + { + const int chan = src.meta().chan; + float* _scratch = scratch.OutLine(); + + scalar_to_scratch(_scalar, _scratch, scratch.length(), chan); + } + + const float* scalar = scratch.OutLine(); const float scale = static_cast(_scale); // DST SRC OP __VA_ARGS__ - UNARY_(uchar , uchar , run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); - UNARY_(uchar , short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); - UNARY_(uchar , float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); - UNARY_( short, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); - UNARY_( float, uchar , run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); - UNARY_( float, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); - UNARY_( float, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(uchar, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(uchar, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(uchar, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(uchar, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(ushort, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(ushort, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(ushort, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(ushort, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(short, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(short, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(short, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(short, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(float, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(float, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(float, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); + UNARY_(float, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale); CV_Error(cv::Error::StsBadArg, "unsupported combination of types"); } + + static void initScratch(const GScalarDesc&, const GMatDesc&, double, int, Buffer& scratch) + { + initScratchBuffer(scratch); + } + + static void resetScratch(Buffer& /*scratch*/) + { + } }; //------------------- @@ -1764,102 +1555,43 @@ GAPI_FLUID_KERNEL(GFluidLUT, cv::gapi::core::GLUT, false) // //------------------------- -#if CV_SIMD128 -template -CV_ALWAYS_INLINE int run_convertto_simd(DST*, const SRC*, int) +template +CV_ALWAYS_INLINE void convertto_impl(const T in[], T out[], const int length) { - return 0; + memcpy(out, in, length * sizeof(T)); } -CV_ALWAYS_INLINE int run_convertto_simd(uchar *out, const float *in, const int length) + +template +CV_ALWAYS_INLINE void convertto_impl(const SRC in[], DST out[], const int length) { - int l = 0; - for (; l <= length - 16; l += 16) - { - v_int32x4 i0, i1, i2, i3; - i0 = v_round( v_load( (float*)& in[l ] ) ); - i1 = v_round( v_load( (float*)& in[l + 4] ) ); - i2 = v_round( v_load( (float*)& in[l + 8] ) ); - i3 = v_round( v_load( (float*)& in[l + 12] ) ); - - v_uint16x8 us0, us1; - us0 = v_pack_u(i0, i1); - us1 = v_pack_u(i2, i3); - - v_uint8x16 uc; - uc = v_pack(us0, us1); - v_store((uchar*)& out[l], uc); - } - return l; -} -CV_ALWAYS_INLINE int run_convertto_simd(ushort *out, const float *in, const int length) -{ - int l = 0; - for (; l <= length - 8; l += 8) - { - v_int32x4 i0, i1; - i0 = v_round( v_load( (float*)& in[l ] ) ); - i1 = v_round( v_load( (float*)& in[l + 4] ) ); - - v_uint16x8 us; - us = v_pack_u(i0, i1); - v_store((ushort*)& out[l], us); - } - return l; -} -#endif - -template::value && - std::is_floating_point::value, bool> = true > -CV_ALWAYS_INLINE void run_convertto(DST *out, const SRC *in, const int length) -{ - // manual SIMD if need rounding - static_assert(std::is_same::value, "64-bit floating-point source is not supported"); - int l = 0; // cycle index -#if CV_SIMD128 - l = run_convertto_simd(out, in, length); + int x = 0; +#if CV_SIMD + x = convertto_simd(in, out, length); #endif // tail of SIMD cycle - for (; l < length; l++) + for (; x < length; ++x) { - out[l] = saturate(in[l], rintf); + out[x] = saturate(in[x], rintf); } } -template::value && - std::is_integral::value , bool> = true > -CV_ALWAYS_INLINE void run_convertto(DST *out, const SRC *in, const int length) + +template +CV_ALWAYS_INLINE void convertto_impl(const SRC *in, DST* out, const float alpha, const float beta, + const int length) { - for (int l = 0; l < length; l++) + int x = 0; +#if CV_SIMD + x = convertto_scaled_simd(in, out, alpha, beta, length); +#endif + + for (; x < length; ++x) { - out[l] = saturate(in[l]); - } -} -template::value, bool> = true > -CV_ALWAYS_INLINE void run_convertto(DST *out, const SRC *in, const int length) -{ - static_assert(!std::is_same::value, "64-bit floating-point source is not supported"); - for (int l = 0; l < length; l++) - { - out[l] = static_cast(in[l]); + out[x] = saturate(in[x] * alpha + beta, rintf); } } template -CV_ALWAYS_INLINE void run_convertto(DST *out, const SRC *in, const float alpha, const float beta, - const int length) -{ - static_assert(!std::is_same::value, "64-bit floating-point source is not supported"); - // TODO: optimize if alpha and beta and data are integral - for (int l = 0; l < length; l++) - { - out[l] = saturate(in[l] * alpha + beta, rintf); - } -} - -template -static void run_convertto(Buffer &dst, const View &src, double _alpha, double _beta) +CV_ALWAYS_INLINE void run_convertto(Buffer &dst, const View &src, double _alpha, double _beta) { const auto *in = src.InLine(0); auto *out = dst.OutLine(); @@ -1873,13 +1605,13 @@ static void run_convertto(Buffer &dst, const View &src, double _alpha, double _b const auto beta = static_cast( _beta ); // compute faster if no alpha no beta - if (1.f == alpha && 0.f == beta) + if ((std::fabs(alpha - 1.f) < FLT_EPSILON) && (std::fabs(beta) < FLT_EPSILON)) { - run_convertto(out, in, length); + convertto_impl(in, out, length); } else // if alpha or beta is non-trivial { - run_convertto(out, in, alpha, beta, length); + convertto_impl(in, out, alpha, beta, length); } } @@ -1890,22 +1622,22 @@ GAPI_FLUID_KERNEL(GFluidConvertTo, cv::gapi::core::GConvertTo, false) static void run(const View &src, int /*rtype*/, double alpha, double beta, Buffer &dst) { // DST SRC OP __VA_ARGS__ - UNARY_(uchar , uchar , run_convertto, dst, src, alpha, beta); - UNARY_(uchar , ushort, run_convertto, dst, src, alpha, beta); - UNARY_(uchar , short, run_convertto, dst, src, alpha, beta); - UNARY_(uchar , float, run_convertto, dst, src, alpha, beta); + UNARY_(uchar, uchar , run_convertto, dst, src, alpha, beta); + UNARY_(uchar, ushort, run_convertto, dst, src, alpha, beta); + UNARY_(uchar, short, run_convertto, dst, src, alpha, beta); + UNARY_(uchar, float, run_convertto, dst, src, alpha, beta); UNARY_(ushort, uchar , run_convertto, dst, src, alpha, beta); UNARY_(ushort, ushort, run_convertto, dst, src, alpha, beta); UNARY_(ushort, short, run_convertto, dst, src, alpha, beta); UNARY_(ushort, float, run_convertto, dst, src, alpha, beta); - UNARY_( short, uchar , run_convertto, dst, src, alpha, beta); - UNARY_( short, ushort, run_convertto, dst, src, alpha, beta); - UNARY_( short, short, run_convertto, dst, src, alpha, beta); - UNARY_( short, float, run_convertto, dst, src, alpha, beta); - UNARY_( float, uchar , run_convertto, dst, src, alpha, beta); - UNARY_( float, ushort, run_convertto, dst, src, alpha, beta); - UNARY_( float, short, run_convertto, dst, src, alpha, beta); - UNARY_( float, float, run_convertto, dst, src, alpha, beta); + UNARY_(short, uchar , run_convertto, dst, src, alpha, beta); + UNARY_(short, ushort, run_convertto, dst, src, alpha, beta); + UNARY_(short, short, run_convertto, dst, src, alpha, beta); + UNARY_(short, float, run_convertto, dst, src, alpha, beta); + UNARY_(float, uchar , run_convertto, dst, src, alpha, beta); + UNARY_(float, ushort, run_convertto, dst, src, alpha, beta); + UNARY_(float, short, run_convertto, dst, src, alpha, beta); + UNARY_(float, float, run_convertto, dst, src, alpha, beta); CV_Error(cv::Error::StsBadArg, "unsupported combination of types"); } diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp index 9afac9ceb4..c9d329b2ff 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp @@ -235,6 +235,33 @@ ABSDIFFC_SIMD(float) #undef ABSDIFFC_SIMD +#define DIVRC_SIMD(SRC, DST) \ +int divrc_simd(const float scalar[], const SRC in[], DST out[], \ + const int length, const int chan, const float scale) \ +{ \ + CV_CPU_DISPATCH(divrc_simd, (scalar, in, out, length, chan, scale), \ + CV_CPU_DISPATCH_MODES_ALL); \ +} + +DIVRC_SIMD(uchar, uchar) +DIVRC_SIMD(ushort, uchar) +DIVRC_SIMD(short, uchar) +DIVRC_SIMD(float, uchar) +DIVRC_SIMD(short, short) +DIVRC_SIMD(ushort, short) +DIVRC_SIMD(uchar, short) +DIVRC_SIMD(float, short) +DIVRC_SIMD(ushort, ushort) +DIVRC_SIMD(uchar, ushort) +DIVRC_SIMD(short, ushort) +DIVRC_SIMD(float, ushort) +DIVRC_SIMD(uchar, float) +DIVRC_SIMD(ushort, float) +DIVRC_SIMD(short, float) +DIVRC_SIMD(float, float) + +#undef DIVRC_SIMD + int split3_simd(const uchar in[], uchar out1[], uchar out2[], uchar out3[], const int width) { @@ -263,6 +290,107 @@ int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], CV_CPU_DISPATCH_MODES_ALL); } +#define ADD_SIMD(SRC, DST) \ +int add_simd(const SRC in1[], const SRC in2[], DST out[], const int length) \ +{ \ + CV_CPU_DISPATCH(add_simd, (in1, in2, out, length), \ + CV_CPU_DISPATCH_MODES_ALL); \ +} + +ADD_SIMD(uchar, uchar) +ADD_SIMD(ushort, uchar) +ADD_SIMD(short, uchar) +ADD_SIMD(float, uchar) +ADD_SIMD(short, short) +ADD_SIMD(ushort, short) +ADD_SIMD(uchar, short) +ADD_SIMD(float, short) +ADD_SIMD(ushort, ushort) +ADD_SIMD(uchar, ushort) +ADD_SIMD(short, ushort) +ADD_SIMD(float, ushort) +ADD_SIMD(uchar, float) +ADD_SIMD(ushort, float) +ADD_SIMD(short, float) +ADD_SIMD(float, float) + +#undef ADD_SIMD + +#define SUB_SIMD(SRC, DST) \ +int sub_simd(const SRC in1[], const SRC in2[], DST out[], const int length) \ +{ \ + CV_CPU_DISPATCH(sub_simd, (in1, in2, out, length), \ + CV_CPU_DISPATCH_MODES_ALL); \ +} + +SUB_SIMD(uchar, uchar) +SUB_SIMD(ushort, uchar) +SUB_SIMD(short, uchar) +SUB_SIMD(float, uchar) +SUB_SIMD(short, short) +SUB_SIMD(ushort, short) +SUB_SIMD(uchar, short) +SUB_SIMD(float, short) +SUB_SIMD(ushort, ushort) +SUB_SIMD(uchar, ushort) +SUB_SIMD(short, ushort) +SUB_SIMD(float, ushort) +SUB_SIMD(uchar, float) +SUB_SIMD(ushort, float) +SUB_SIMD(short, float) +SUB_SIMD(float, float) + +#undef SUB_SIMD + +#define CONVERTTO_NOCOEF_SIMD(SRC, DST) \ +int convertto_simd(const SRC in[], DST out[], const int length) \ +{ \ + CV_CPU_DISPATCH(convertto_simd, (in, out, length), \ + CV_CPU_DISPATCH_MODES_ALL); \ +} + +CONVERTTO_NOCOEF_SIMD(ushort, uchar) +CONVERTTO_NOCOEF_SIMD(short, uchar) +CONVERTTO_NOCOEF_SIMD(float, uchar) +CONVERTTO_NOCOEF_SIMD(ushort, short) +CONVERTTO_NOCOEF_SIMD(uchar, short) +CONVERTTO_NOCOEF_SIMD(float, short) +CONVERTTO_NOCOEF_SIMD(uchar, ushort) +CONVERTTO_NOCOEF_SIMD(short, ushort) +CONVERTTO_NOCOEF_SIMD(float, ushort) +CONVERTTO_NOCOEF_SIMD(uchar, float) +CONVERTTO_NOCOEF_SIMD(ushort, float) +CONVERTTO_NOCOEF_SIMD(short, float) + +#undef CONVERTTO_NOCOEF_SIMD + +#define CONVERTTO_SCALED_SIMD(SRC, DST) \ +int convertto_scaled_simd(const SRC in[], DST out[], const float alpha, \ + const float beta, const int length) \ +{ \ + CV_CPU_DISPATCH(convertto_scaled_simd, (in, out, alpha, beta, length), \ + CV_CPU_DISPATCH_MODES_ALL); \ +} + +CONVERTTO_SCALED_SIMD(uchar, uchar) +CONVERTTO_SCALED_SIMD(ushort, uchar) +CONVERTTO_SCALED_SIMD(short, uchar) +CONVERTTO_SCALED_SIMD(float, uchar) +CONVERTTO_SCALED_SIMD(short, short) +CONVERTTO_SCALED_SIMD(ushort, short) +CONVERTTO_SCALED_SIMD(uchar, short) +CONVERTTO_SCALED_SIMD(float, short) +CONVERTTO_SCALED_SIMD(ushort, ushort) +CONVERTTO_SCALED_SIMD(uchar, ushort) +CONVERTTO_SCALED_SIMD(short, ushort) +CONVERTTO_SCALED_SIMD(float, ushort) +CONVERTTO_SCALED_SIMD(uchar, float) +CONVERTTO_SCALED_SIMD(ushort, float) +CONVERTTO_SCALED_SIMD(short, float) +CONVERTTO_SCALED_SIMD(float, float) + +#undef CONVERTTO_SCALED_SIMD + } // namespace fluid } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp index 868923932d..81aa098b64 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp @@ -187,6 +187,29 @@ ABSDIFFC_SIMD(float) #undef ABSDIFFC_SIMD +#define DIVRC_SIMD(SRC, DST) \ +int divrc_simd(const float scalar[], const SRC in[], DST out[], \ + const int length, const int chan, const float scale); + +DIVRC_SIMD(uchar, uchar) +DIVRC_SIMD(ushort, uchar) +DIVRC_SIMD(short, uchar) +DIVRC_SIMD(float, uchar) +DIVRC_SIMD(short, short) +DIVRC_SIMD(ushort, short) +DIVRC_SIMD(uchar, short) +DIVRC_SIMD(float, short) +DIVRC_SIMD(ushort, ushort) +DIVRC_SIMD(uchar, ushort) +DIVRC_SIMD(short, ushort) +DIVRC_SIMD(float, ushort) +DIVRC_SIMD(uchar, float) +DIVRC_SIMD(ushort, float) +DIVRC_SIMD(short, float) +DIVRC_SIMD(float, float) + +#undef DIVRC_SIMD + int split3_simd(const uchar in[], uchar out1[], uchar out2[], uchar out3[], const int width); @@ -199,6 +222,91 @@ int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], const uchar in4[], uchar out[], const int width); +#define ADD_SIMD(SRC, DST) \ +int add_simd(const SRC in1[], const SRC in2[], DST out[], const int length); + +ADD_SIMD(uchar, uchar) +ADD_SIMD(ushort, uchar) +ADD_SIMD(short, uchar) +ADD_SIMD(float, uchar) +ADD_SIMD(short, short) +ADD_SIMD(ushort, short) +ADD_SIMD(uchar, short) +ADD_SIMD(float, short) +ADD_SIMD(ushort, ushort) +ADD_SIMD(uchar, ushort) +ADD_SIMD(short, ushort) +ADD_SIMD(float, ushort) +ADD_SIMD(uchar, float) +ADD_SIMD(ushort, float) +ADD_SIMD(short, float) +ADD_SIMD(float, float) + +#undef ADD_SIMD + +#define SUB_SIMD(SRC, DST) \ +int sub_simd(const SRC in1[], const SRC in2[], DST out[], const int length); + +SUB_SIMD(uchar, uchar) +SUB_SIMD(ushort, uchar) +SUB_SIMD(short, uchar) +SUB_SIMD(float, uchar) +SUB_SIMD(short, short) +SUB_SIMD(ushort, short) +SUB_SIMD(uchar, short) +SUB_SIMD(float, short) +SUB_SIMD(ushort, ushort) +SUB_SIMD(uchar, ushort) +SUB_SIMD(short, ushort) +SUB_SIMD(float, ushort) +SUB_SIMD(uchar, float) +SUB_SIMD(ushort, float) +SUB_SIMD(short, float) +SUB_SIMD(float, float) + +#undef SUB_SIMD + +#define CONVERTTO_NOCOEF_SIMD(SRC, DST) \ +int convertto_simd(const SRC in[], DST out[], const int length); + +CONVERTTO_NOCOEF_SIMD(ushort, uchar) +CONVERTTO_NOCOEF_SIMD(short, uchar) +CONVERTTO_NOCOEF_SIMD(float, uchar) +CONVERTTO_NOCOEF_SIMD(ushort, short) +CONVERTTO_NOCOEF_SIMD(uchar, short) +CONVERTTO_NOCOEF_SIMD(float, short) +CONVERTTO_NOCOEF_SIMD(uchar, ushort) +CONVERTTO_NOCOEF_SIMD(short, ushort) +CONVERTTO_NOCOEF_SIMD(float, ushort) +CONVERTTO_NOCOEF_SIMD(uchar, float) +CONVERTTO_NOCOEF_SIMD(ushort, float) +CONVERTTO_NOCOEF_SIMD(short, float) + +#undef CONVERTTO_NOCOEF_SIMD + +#define CONVERTTO_SCALED_SIMD(SRC, DST) \ +int convertto_scaled_simd(const SRC in[], DST out[], const float alpha, \ + const float beta, const int length); + +CONVERTTO_SCALED_SIMD(uchar, uchar) +CONVERTTO_SCALED_SIMD(ushort, uchar) +CONVERTTO_SCALED_SIMD(short, uchar) +CONVERTTO_SCALED_SIMD(float, uchar) +CONVERTTO_SCALED_SIMD(short, short) +CONVERTTO_SCALED_SIMD(ushort, short) +CONVERTTO_SCALED_SIMD(uchar, short) +CONVERTTO_SCALED_SIMD(float, short) +CONVERTTO_SCALED_SIMD(ushort, ushort) +CONVERTTO_SCALED_SIMD(uchar, ushort) +CONVERTTO_SCALED_SIMD(short, ushort) +CONVERTTO_SCALED_SIMD(float, ushort) +CONVERTTO_SCALED_SIMD(uchar, float) +CONVERTTO_SCALED_SIMD(ushort, float) +CONVERTTO_SCALED_SIMD(short, float) +CONVERTTO_SCALED_SIMD(float, float) + +#undef CONVERTTO_SCALED_SIMD + } // namespace fluid } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp index 2424a57677..d1fe33fa2e 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp @@ -150,8 +150,8 @@ SUBRC_SIMD(float, float) #undef SUBRC_SIMD -#define MULC_SIMD(SRC, DST) \ -int mulc_simd(const SRC in[], const float scalar[], DST out[], \ +#define MULC_SIMD(SRC, DST) \ +int mulc_simd(const SRC in[], const float scalar[], DST out[], \ const int length, const int chan, const float scale); MULC_SIMD(uchar, uchar) @@ -173,9 +173,9 @@ MULC_SIMD(float, float) #undef MULC_SIMD -#define DIVC_SIMD(SRC, DST) \ -int divc_simd(const SRC in[], const float scalar[], DST out[], \ - const int length, const int chan, const float scale, \ +#define DIVC_SIMD(SRC, DST) \ +int divc_simd(const SRC in[], const float scalar[], DST out[], \ + const int length, const int chan, const float scale, \ const int set_mask_flag); DIVC_SIMD(uchar, uchar) @@ -208,6 +208,114 @@ ABSDIFFC_SIMD(float) #undef ABSDIFFC_SIMD +#define DIVRC_SIMD(SRC, DST) \ +int divrc_simd(const float scalar[], const SRC in[], DST out[], \ + const int length, const int chan, const float scale); + +DIVRC_SIMD(uchar, uchar) +DIVRC_SIMD(ushort, uchar) +DIVRC_SIMD(short, uchar) +DIVRC_SIMD(float, uchar) +DIVRC_SIMD(short, short) +DIVRC_SIMD(ushort, short) +DIVRC_SIMD(uchar, short) +DIVRC_SIMD(float, short) +DIVRC_SIMD(ushort, ushort) +DIVRC_SIMD(uchar, ushort) +DIVRC_SIMD(short, ushort) +DIVRC_SIMD(float, ushort) +DIVRC_SIMD(uchar, float) +DIVRC_SIMD(ushort, float) +DIVRC_SIMD(short, float) +DIVRC_SIMD(float, float) + +#undef DIVRC_SIMD + +#define ADD_SIMD(SRC, DST) \ +int add_simd(const SRC in1[], const SRC in2[], DST out[], const int length); + +ADD_SIMD(uchar, uchar) +ADD_SIMD(ushort, uchar) +ADD_SIMD(short, uchar) +ADD_SIMD(float, uchar) +ADD_SIMD(short, short) +ADD_SIMD(ushort, short) +ADD_SIMD(uchar, short) +ADD_SIMD(float, short) +ADD_SIMD(ushort, ushort) +ADD_SIMD(uchar, ushort) +ADD_SIMD(short, ushort) +ADD_SIMD(float, ushort) +ADD_SIMD(uchar, float) +ADD_SIMD(ushort, float) +ADD_SIMD(short, float) +ADD_SIMD(float, float) + +#undef ADD_SIMD + +#define SUB_SIMD(SRC, DST) \ +int sub_simd(const SRC in1[], const SRC in2[], DST out[], const int length); + +SUB_SIMD(uchar, uchar) +SUB_SIMD(ushort, uchar) +SUB_SIMD(short, uchar) +SUB_SIMD(float, uchar) +SUB_SIMD(short, short) +SUB_SIMD(ushort, short) +SUB_SIMD(uchar, short) +SUB_SIMD(float, short) +SUB_SIMD(ushort, ushort) +SUB_SIMD(uchar, ushort) +SUB_SIMD(short, ushort) +SUB_SIMD(float, ushort) +SUB_SIMD(uchar, float) +SUB_SIMD(ushort, float) +SUB_SIMD(short, float) +SUB_SIMD(float, float) + +#undef SUB_SIMD + +#define CONVERTTO_NOCOEF_SIMD(SRC, DST) \ +int convertto_simd(const SRC in[], DST out[], const int length); + +CONVERTTO_NOCOEF_SIMD(ushort, uchar) +CONVERTTO_NOCOEF_SIMD(short, uchar) +CONVERTTO_NOCOEF_SIMD(float, uchar) +CONVERTTO_NOCOEF_SIMD(ushort, short) +CONVERTTO_NOCOEF_SIMD(uchar, short) +CONVERTTO_NOCOEF_SIMD(float, short) +CONVERTTO_NOCOEF_SIMD(uchar, ushort) +CONVERTTO_NOCOEF_SIMD(short, ushort) +CONVERTTO_NOCOEF_SIMD(float, ushort) +CONVERTTO_NOCOEF_SIMD(uchar, float) +CONVERTTO_NOCOEF_SIMD(ushort, float) +CONVERTTO_NOCOEF_SIMD(short, float) + +#undef CONVERTTO_NOCOEF_SIMD + +#define CONVERTTO_SCALED_SIMD(SRC, DST) \ +int convertto_scaled_simd(const SRC in[], DST out[], const float alpha, \ + const float beta, const int length); + +CONVERTTO_SCALED_SIMD(uchar, uchar) +CONVERTTO_SCALED_SIMD(ushort, uchar) +CONVERTTO_SCALED_SIMD(short, uchar) +CONVERTTO_SCALED_SIMD(float, uchar) +CONVERTTO_SCALED_SIMD(short, short) +CONVERTTO_SCALED_SIMD(ushort, short) +CONVERTTO_SCALED_SIMD(uchar, short) +CONVERTTO_SCALED_SIMD(float, short) +CONVERTTO_SCALED_SIMD(ushort, ushort) +CONVERTTO_SCALED_SIMD(uchar, ushort) +CONVERTTO_SCALED_SIMD(short, ushort) +CONVERTTO_SCALED_SIMD(float, ushort) +CONVERTTO_SCALED_SIMD(uchar, float) +CONVERTTO_SCALED_SIMD(ushort, float) +CONVERTTO_SCALED_SIMD(short, float) +CONVERTTO_SCALED_SIMD(float, float) + +#undef CONVERTTO_SCALED_SIMD + int split3_simd(const uchar in[], uchar out1[], uchar out2[], uchar out3[], const int width); @@ -222,6 +330,11 @@ int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY +#define SRC_SHORT_OR_USHORT std::is_same::value || std::is_same::value +#define DST_SHORT_OR_USHORT std::is_same::value || std::is_same::value +#define SRC_DST_SHORT_AND_USHORT (std::is_same::value && std::is_same::value) || (std::is_same::value && std::is_same::value) +#define SRC_DST_SHORT_OR_USHORT (std::is_same::value && std::is_same::value) || (std::is_same::value && std::is_same::value) + struct scale_tag {}; struct not_scale_tag {}; @@ -236,6 +349,28 @@ template<> struct vector_type_of { using type = v_uint16; }; template<> struct vector_type_of { using type = v_int16; }; template<> struct vector_type_of { using type = v_float32; }; +template +struct zero_vec_type_of; + +template +using zero_vec_type_of_t = typename zero_vec_type_of::type; + +template<> struct zero_vec_type_of { using type = v_int16; }; +template<> struct zero_vec_type_of { using type = v_int16; }; +template<> struct zero_vec_type_of { using type = v_int16; }; +template<> struct zero_vec_type_of { using type = v_float32; }; + +template +struct univ_zero_vec_type_of; + +template +using univ_zero_vec_type_of_t = typename univ_zero_vec_type_of::type; + +template<> struct univ_zero_vec_type_of { using type = v_uint8; }; +template<> struct univ_zero_vec_type_of { using type = v_int16; }; +template<> struct univ_zero_vec_type_of { using type = v_int16; }; +template<> struct univ_zero_vec_type_of { using type = v_float32; }; + CV_ALWAYS_INLINE v_float32 vg_load_f32(const float* in) { return vx_load(in); @@ -295,165 +430,111 @@ CV_ALWAYS_INLINE void v_store_select(short* dst, const v_int16& div, const v_int CV_ALWAYS_INLINE void v_store_select(ushort* dst, const v_int16& div, const v_int16& v_zero, const v_int32& res1, const v_int32& res2) { - v_uint16 sel = v_reinterpret_as_u16(v_select(div == v_zero, v_zero, v_pack(res1, res2))); - vx_store(dst, sel); + vx_store(dst, v_select(v_reinterpret_as_u16(div == v_zero), + v_reinterpret_as_u16(v_zero), v_pack_u(res1, res2))); } -//================================================================================================= +//============================================================================= -template +template CV_ALWAYS_INLINE -typename std::enable_if<(std::is_same::value && std::is_same::value) || - (std::is_same::value && std::is_same::value) || - (std::is_same::value && std::is_same::value), int>::type -div_hal(scale_tag_t t, const SRC in1[], const SRC in2[], DST out[], const int length, double _scale) +void div_simd_impl(scale_tag_t s_tag, const v_float32& a1, const v_float32& a2, + const v_float32& a3, const v_float32& a4, const uchar* in2x, + uchar* outx, const v_float32& v_scale, const v_int16& v_zero) { - constexpr int nlanes = vector_type_of_t::nlanes; + constexpr int nlanes = v_uint8::nlanes; - if (length < nlanes) - return 0; + v_int16 div1 = v_reinterpret_as_s16(vx_load_expand(in2x)); + v_int16 div2 = v_reinterpret_as_s16(vx_load_expand(&in2x[nlanes/2])); - v_int16 v_zero = vx_setall_s16(0); - v_float32 scale = vx_setall_f32(static_cast(_scale)); + v_float32 fdiv1 = v_cvt_f32(v_expand_low(div1)); + v_float32 fdiv2 = v_cvt_f32(v_expand_high(div1)); + v_float32 fdiv3 = v_cvt_f32(v_expand_low(div2)); + v_float32 fdiv4 = v_cvt_f32(v_expand_high(div2)); - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - v_float32 a1 = vg_load_f32(&in1[x]); - v_float32 a2 = vg_load_f32(&in1[x + nlanes / 2]); + v_int32 sum1 = v_round(div_op(s_tag, a1, fdiv1, v_scale)), + sum2 = v_round(div_op(s_tag, a2, fdiv2, v_scale)), + sum3 = v_round(div_op(s_tag, a3, fdiv3, v_scale)), + sum4 = v_round(div_op(s_tag, a4, fdiv4, v_scale)); - v_int16 div = v_reinterpret_as_s16(vx_load(&in2[x])); + v_int16 res1 = v_select((div1 == v_zero), v_zero, v_pack(sum1, sum2)); + v_int16 res2 = v_select((div2 == v_zero), v_zero, v_pack(sum3, sum4)); - v_float32 fdiv1 = v_cvt_f32(v_expand_low(div)); - v_float32 fdiv2 = v_cvt_f32(v_expand_high(div)); - - v_int32 r1 = v_round(div_op(t, a1, fdiv1, scale)); - v_int32 r2 = v_round(div_op(t, a2, fdiv2, scale)); - - v_store_select(&out[x], div, v_zero, r1, r2); - } - - if (x < length) - { - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - return x; + vx_store(outx, v_pack_u(res1, res2)); } -//------------------------------------------------------------------------------------------------- - template CV_ALWAYS_INLINE typename std::enable_if::value || - std::is_same::value, int>::type -div_hal(scale_tag_t t, const SRC in1[], const SRC in2[], uchar out[], const int length, double _scale) + std::is_same::value, void>::type +div_simd_impl(scale_tag_t s_tag, const v_float32& a1, const v_float32& a2, + const v_float32& a3, const v_float32& a4, const SRC* in2x, + uchar* outx, const v_float32& v_scale, const v_int16& v_zero) { constexpr int nlanes = v_uint8::nlanes; - if (length < nlanes) - return 0; + v_int16 div1 = v_reinterpret_as_s16(vx_load(in2x)); + v_int16 div2 = v_reinterpret_as_s16(vx_load(&in2x[nlanes/2])); - v_float32 scale = vx_setall_f32(static_cast(_scale)); - v_int16 v_zero = vx_setall_s16(0); + v_float32 fdiv1 = v_cvt_f32(v_expand_low(div1)); + v_float32 fdiv2 = v_cvt_f32(v_expand_high(div1)); + v_float32 fdiv3 = v_cvt_f32(v_expand_low(div2)); + v_float32 fdiv4 = v_cvt_f32(v_expand_high(div2)); - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - v_float32 a1 = vg_load_f32(&in1[x]); - v_float32 a2 = vg_load_f32(&in1[x + nlanes / 4]); - v_float32 a3 = vg_load_f32(&in1[x + nlanes / 2]); - v_float32 a4 = vg_load_f32(&in1[x + 3 * nlanes / 4]); + v_int32 sum1 = v_round(div_op(s_tag, a1, fdiv1, v_scale)), + sum2 = v_round(div_op(s_tag, a2, fdiv2, v_scale)), + sum3 = v_round(div_op(s_tag, a3, fdiv3, v_scale)), + sum4 = v_round(div_op(s_tag, a4, fdiv4, v_scale)); - v_int16 div1 = v_reinterpret_as_s16(vx_load(&in2[x])); - v_int16 div2 = v_reinterpret_as_s16(vx_load(&in2[x + nlanes/2])); + v_int16 res1 = v_select((div1 == v_zero), v_zero, v_pack(sum1, sum2)); + v_int16 res2 = v_select((div2 == v_zero), v_zero, v_pack(sum3, sum4)); - v_float32 fdiv1 = v_cvt_f32(v_expand_low(div1)); - v_float32 fdiv2 = v_cvt_f32(v_expand_high(div1)); - v_float32 fdiv3 = v_cvt_f32(v_expand_low(div2)); - v_float32 fdiv4 = v_cvt_f32(v_expand_high(div2)); - - v_int32 sum1 = v_round(div_op(t, a1, fdiv1, scale)), - sum2 = v_round(div_op(t, a2, fdiv2, scale)), - sum3 = v_round(div_op(t, a3, fdiv3, scale)), - sum4 = v_round(div_op(t, a4, fdiv4, scale)); - - v_int16 res1 = v_select((div1 == v_zero), v_zero, v_pack(sum1, sum2)); - v_int16 res2 = v_select((div2 == v_zero), v_zero, v_pack(sum3, sum4)); - - vx_store(&out[x], v_pack_u(res1, res2)); - } - - if (x < length) - { - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - return x; + vx_store(outx, v_pack_u(res1, res2)); } -//------------------------------------------------------------------------------------------------- - template -CV_ALWAYS_INLINE int div_hal(scale_tag_t t, const float in1[], const float in2[], uchar out[], - const int length, double _scale) +CV_ALWAYS_INLINE void div_simd_impl(scale_tag_t s_tag, const v_float32& a1, + const v_float32& a2, const v_float32& a3, + const v_float32& a4, const float* in2x, uchar* outx, + const v_float32& v_scale, const v_float32& v_zero) { constexpr int nlanes = v_uint8::nlanes; - if (length < nlanes) - return 0; + v_float32 div1 = vg_load_f32(in2x); + v_float32 div2 = vg_load_f32(&in2x[nlanes / 4]); + v_float32 div3 = vg_load_f32(&in2x[nlanes / 2]); + v_float32 div4 = vg_load_f32(&in2x[3 * nlanes / 4]); - v_float32 scale = vx_setall_f32(static_cast(_scale)); - v_float32 v_zero = vx_setall_f32(0); - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - v_float32 a1 = vg_load_f32(&in1[x]); - v_float32 a2 = vg_load_f32(&in1[x + nlanes / 4]); - v_float32 a3 = vg_load_f32(&in1[x + nlanes / 2]); - v_float32 a4 = vg_load_f32(&in1[x + 3 * nlanes / 4]); + v_float32 r1 = div_op(s_tag, a1, div1, v_scale); + v_float32 r2 = div_op(s_tag, a2, div2, v_scale); + v_float32 r3 = div_op(s_tag, a3, div3, v_scale); + v_float32 r4 = div_op(s_tag, a4, div4, v_scale); - v_float32 div1 = vg_load_f32(&in2[x]); - v_float32 div2 = vg_load_f32(&in2[x + nlanes / 4]); - v_float32 div3 = vg_load_f32(&in2[x + nlanes / 2]); - v_float32 div4 = vg_load_f32(&in2[x + 3 * nlanes / 4]); + v_float32 sel1 = v_select((div1 == v_zero), v_zero, r1); + v_float32 sel2 = v_select((div2 == v_zero), v_zero, r2); + v_float32 sel3 = v_select((div3 == v_zero), v_zero, r3); + v_float32 sel4 = v_select((div4 == v_zero), v_zero, r4); - v_float32 r1 = div_op(t, a1, div1, scale); - v_float32 r2 = div_op(t, a2, div2, scale); - v_float32 r3 = div_op(t, a3, div3, scale); - v_float32 r4 = div_op(t, a4, div4, scale); + v_int32 res1 = v_round(sel1); + v_int32 res2 = v_round(sel2); + v_int32 res3 = v_round(sel3); + v_int32 res4 = v_round(sel4); - v_float32 sel1 = v_select((div1 == v_zero), v_zero, r1); - v_float32 sel2 = v_select((div2 == v_zero), v_zero, r2); - v_float32 sel3 = v_select((div3 == v_zero), v_zero, r3); - v_float32 sel4 = v_select((div4 == v_zero), v_zero, r4); + vx_store(outx, v_pack_u(v_pack(res1, res2), v_pack(res3, res4))); +} - v_int32 res1 = v_round(sel1); - v_int32 res2 = v_round(sel2); - v_int32 res3 = v_round(sel3); - v_int32 res4 = v_round(sel4); +template +CV_ALWAYS_INLINE void div_hal(scale_tag_t s_tag, const SRC* in1x, const SRC* in2x, uchar* outx, + const v_float32& v_scale, const Vtype& v_zero) +{ + constexpr int nlanes = v_uint8::nlanes; - vx_store(&out[x], v_pack_u(v_pack(res1, res2), v_pack(res3, res4))); - } + v_float32 a1 = vg_load_f32(in1x); + v_float32 a2 = vg_load_f32(&in1x[nlanes / 4]); + v_float32 a3 = vg_load_f32(&in1x[nlanes / 2]); + v_float32 a4 = vg_load_f32(&in1x[3 * nlanes / 4]); - if (x < length) - { - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - return x; + div_simd_impl(s_tag, a1, a2, a3, a4, in2x, outx, v_scale, v_zero); } //------------------------------------------------------------------------------------------------- @@ -461,113 +542,117 @@ CV_ALWAYS_INLINE int div_hal(scale_tag_t t, const float in1[], const float in2[] template CV_ALWAYS_INLINE typename std::enable_if::value || - std::is_same::value, int>::type -div_hal(scale_tag_t t, const uchar in1[], const uchar in2[], DST out[], const int length, double _scale) + std::is_same::value, void>::type +div_simd_impl(scale_tag_t s_tag, const v_float32& a1, const v_float32& a2, + const uchar* in2x, DST* outx, const v_float32& v_scale, + const v_int16& v_zero) { - constexpr int nlanes = vector_type_of_t::nlanes; + v_int16 div = v_reinterpret_as_s16(vx_load_expand(in2x)); - if (length < nlanes) - return 0; + v_float32 fdiv1 = v_cvt_f32(v_expand_low(div)); + v_float32 fdiv2 = v_cvt_f32(v_expand_high(div)); - v_float32 scale = vx_setall_f32(static_cast(_scale)); - v_int16 v_zero = vx_setall_s16(0); + v_int32 r1 = v_round(div_op(s_tag, a1, fdiv1, v_scale)); + v_int32 r2 = v_round(div_op(s_tag, a2, fdiv2, v_scale)); - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - v_float32 a1 = vg_load_f32(&in1[x]); - v_float32 a2 = vg_load_f32(&in1[x + nlanes / 2]); - - v_int16 div = v_reinterpret_as_s16(vx_load_expand(&in2[x])); - - v_float32 fdiv1 = v_cvt_f32(v_expand_low(div)); - v_float32 fdiv2 = v_cvt_f32(v_expand_high(div)); - - v_int32 r1 = v_round(div_op(t, a1, fdiv1, scale)); - v_int32 r2 = v_round(div_op(t, a2, fdiv2, scale)); - - v_store_select(&out[x], div, v_zero, r1, r2); - } - - if (x < length) - { - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - return x; + v_store_select(outx, div, v_zero, r1, r2); } -//------------------------------------------------------------------------------------------------- +template +CV_ALWAYS_INLINE +typename std::enable_if<(std::is_same::value && std::is_same::value) || + (std::is_same::value && std::is_same::value) || + (std::is_same::value && std::is_same::value) || + (std::is_same::value && std::is_same::value), void>::type +div_simd_impl(scale_tag_t s_tag, const v_float32& a1, const v_float32& a2, + const SRC* in2x, DST* outx, const v_float32& v_scale, const v_int16& v_zero) +{ + v_int16 div = v_reinterpret_as_s16(vx_load(in2x)); + + v_float32 fdiv1 = v_cvt_f32(v_expand_low(div)); + v_float32 fdiv2 = v_cvt_f32(v_expand_high(div)); + + v_int32 r1 = v_round(div_op(s_tag, a1, fdiv1, v_scale)); + v_int32 r2 = v_round(div_op(s_tag, a2, fdiv2, v_scale)); + + v_store_select(outx, div, v_zero, r1, r2); +} template CV_ALWAYS_INLINE typename std::enable_if::value || - std::is_same::value, int>::type -div_hal(scale_tag_t t, const float in1[], const float in2[], DST out[], const int length, double _scale) + std::is_same::value, void>::type +div_simd_impl(scale_tag_t s_tag, const v_float32& a1, const v_float32& a2, + const float* in2x, DST* outx, const v_float32& v_scale, + const v_float32& v_zero) { constexpr int nlanes = vector_type_of_t::nlanes; - if (length < nlanes) - return 0; + v_float32 fdiv1 = vg_load_f32(in2x); + v_float32 fdiv2 = vg_load_f32(&in2x[nlanes / 2]); - v_float32 scale = vx_setall_f32(static_cast(_scale)); - v_float32 v_zero = vx_setall_f32(0); - int x = 0; - for (;;) - { - for (; x <= length - nlanes; x += nlanes) - { - v_float32 a1 = vg_load_f32(&in1[x]); - v_float32 a2 = vg_load_f32(&in1[x + nlanes / 2]); + v_float32 r1 = div_op(s_tag, a1, fdiv1, v_scale); + v_float32 r2 = div_op(s_tag, a2, fdiv2, v_scale); - v_float32 fdiv1 = vg_load_f32(&in2[x]); - v_float32 fdiv2 = vg_load_f32(&in2[x + nlanes / 2]); + v_int32 res1 = v_round(v_select((fdiv1 == v_zero), v_zero, r1)); + v_int32 res2 = v_round(v_select((fdiv2 == v_zero), v_zero, r2)); - v_float32 r1 = div_op(t, a1, fdiv1, scale); - v_float32 r2 = div_op(t, a2, fdiv2, scale); + v_store_i16(outx, res1, res2); +} - v_int32 res1 = v_round(v_select((fdiv1 == v_zero), v_zero, r1)); - v_int32 res2 = v_round(v_select((fdiv2 == v_zero), v_zero, r2)); +template +CV_ALWAYS_INLINE +typename std::enable_if::value || + std::is_same::value, void>::type +div_hal(scale_tag_t s_tag, const SRC* in1x, const SRC* in2x, DST* outx, + const v_float32& v_scale, const Vtype& v_zero) +{ + constexpr int nlanes = vector_type_of_t::nlanes; - v_store_i16(&out[x], res1, res2); - } + v_float32 a1 = vg_load_f32(in1x); + v_float32 a2 = vg_load_f32(&in1x[nlanes / 2]); - if (x < length) - { - x = length - nlanes; - continue; // process one more time (unaligned tail) - } - break; - } - return x; + div_simd_impl(s_tag, a1, a2, in2x, outx, v_scale, v_zero); } //------------------------------------------------------------------------------------------------- template -CV_ALWAYS_INLINE int div_hal(scale_tag_t t, const SRC in1[], const SRC in2[], float out[], - const int length, double _scale) +CV_ALWAYS_INLINE void div_simd_impl(scale_tag_t s_tag, const v_float32& a1, const SRC* in2x, + float* outx, const v_float32& v_scale) { - constexpr int nlanes = v_float32::nlanes; + v_float32 b1 = vg_load_f32(in2x); + vx_store(outx, div_op(s_tag, a1, b1, v_scale)); +} + +template +CV_ALWAYS_INLINE void div_hal(scale_tag_t s_tag, const SRC* in1x, const SRC* in2x, float* outx, + const v_float32& v_scale, const Tvec&) +{ + v_float32 a1 = vg_load_f32(in1x); + div_simd_impl(s_tag, a1, in2x, outx, v_scale); +} + +//------------------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE int div_simd_common(scale_tag_t s_tag, const SRC in1[], const SRC in2[], + DST out[], const int length, float scale) +{ + constexpr int nlanes = vector_type_of_t::nlanes; if (length < nlanes) return 0; - v_float32 scale = vx_setall_f32(static_cast(_scale)); + const zero_vec_type_of_t v_zero = vx_setall::lane_type>(0); + v_float32 v_scale = vx_setall_f32(scale); int x = 0; for (;;) { for (; x <= length - nlanes; x += nlanes) { - v_float32 a1 = vg_load_f32(&in1[x]); - v_float32 b1 = vg_load_f32(&in2[x]); - - vx_store(&out[x], div_op(t, a1, b1, scale)); + div_hal(s_tag, &in1[x], &in2[x], &out[x], v_scale, v_zero); } if (x < length) @@ -580,28 +665,6 @@ CV_ALWAYS_INLINE int div_hal(scale_tag_t t, const SRC in1[], const SRC in2[], fl return x; } -//------------------------------------------------------------------------------------------------- - -template -CV_ALWAYS_INLINE int div_hal(scale_tag_t, const uchar in1[], const uchar in2[], uchar out[], - const int length, double scale) -{ - hal::div8u(in1, static_cast(length), in2, static_cast(length), - out, static_cast(length), length, 1, &scale); - return length; -} - -template -CV_ALWAYS_INLINE int div_hal(scale_tag_t, const short in1[], const short in2[], short out[], - const int length, double scale) -{ - hal::div16s(in1, static_cast(length), in2, static_cast(length), - out, static_cast(length), length, 1, &scale); - return length; -} - -//------------------------------------------------------------------------------------------------- - #define DIV_SIMD(SRC, DST) \ int div_simd(const SRC in1[], const SRC in2[], DST out[], \ const int length, double _scale) \ @@ -610,13 +673,11 @@ int div_simd(const SRC in1[], const SRC in2[], DST out[], float fscale = static_cast(_scale); \ if (std::fabs(fscale - 1.0f) <= FLT_EPSILON) \ { \ - not_scale_tag t; \ - x = div_hal(t, in1, in2, out, length, _scale); \ + x = div_simd_common(not_scale_tag{}, in1, in2, out, length, fscale); \ } \ else \ { \ - scale_tag t; \ - x = div_hal(t, in1, in2, out, length, _scale); \ + x = div_simd_common(scale_tag{}, in1, in2, out, length, fscale); \ } \ return x; \ } @@ -1553,7 +1614,7 @@ int mulc_simd(const SRC in[], const float scalar[], DST out[], \ else \ { \ return arithmOpScalarScaled_simd_common(op_t, in, scalar, out, \ - length, scale); \ + length, scale); \ } \ } \ case 3: \ @@ -1743,11 +1804,11 @@ divc_simd_c3_impl(scale_tag_t s_tag, SRC in[], DST out[], const v_float32& s1, v_float32 a6 = vg_load_f32(&in[x + 5 * nlanes / 2]); arithmOpScalar_pack_store_c3(&out[x], v_round(v_select(v_mask1, v_zero, div_op(s_tag, a1, s1, v_scale))), - v_round(v_select(v_mask2, v_zero, div_op(s_tag, a2, s2, v_scale))), - v_round(v_select(v_mask3, v_zero, div_op(s_tag, a3, s3, v_scale))), - v_round(v_select(v_mask1, v_zero, div_op(s_tag, a4, s1, v_scale))), - v_round(v_select(v_mask2, v_zero, div_op(s_tag, a5, s2, v_scale))), - v_round(v_select(v_mask3, v_zero, div_op(s_tag, a6, s3, v_scale)))); + v_round(v_select(v_mask2, v_zero, div_op(s_tag, a2, s2, v_scale))), + v_round(v_select(v_mask3, v_zero, div_op(s_tag, a3, s3, v_scale))), + v_round(v_select(v_mask1, v_zero, div_op(s_tag, a4, s1, v_scale))), + v_round(v_select(v_mask2, v_zero, div_op(s_tag, a5, s2, v_scale))), + v_round(v_select(v_mask3, v_zero, div_op(s_tag, a6, s3, v_scale)))); } if (x < length) @@ -1976,14 +2037,432 @@ ABSDIFFC_SIMD(float) #undef ABSDIFFC_SIMD +//------------------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE +typename std::enable_if::value || + std::is_same::value, void>::type +divrc_simd_common_impl(scale_tag_t s_tag, const SRC* inx, + const v_float32& v_scalar, DST* outx, + const v_float32& v_scale, const Tvec& v_zero) +{ + div_simd_impl(s_tag, v_scalar, v_scalar, inx, outx, v_scale, v_zero); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::value, void>::type +divrc_simd_common_impl(scale_tag_t s_tag, const SRC* inx, + const v_float32& v_scalar, DST* outx, + const v_float32& v_scale, const Tvec& v_zero) +{ + div_simd_impl(s_tag, v_scalar, v_scalar, v_scalar, v_scalar, inx, outx, v_scale, v_zero); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::value, void>::type +divrc_simd_common_impl(scale_tag_t s_tag, const SRC* inx, + const v_float32& v_scalar, DST* outx, + const v_float32& v_scale, const Tvec&) +{ + div_simd_impl(s_tag, v_scalar, inx, outx, v_scale); +} + +template +CV_ALWAYS_INLINE int divrc_simd_common(scale_tag_t s_tag, const SRC in[], + const float scalar[], DST out[], + const int length, const float scale) +{ + constexpr int nlanes = vector_type_of_t::nlanes; + + if (length < nlanes) + return 0; + + v_float32 v_scalar = vx_load(scalar); + v_float32 v_scale = vx_setall_f32(scale); + zero_vec_type_of_t v_zero = + vx_setall::lane_type>(0); + + int x = 0; + for (;;) + { + for (; x <= length - nlanes; x += nlanes) + { + divrc_simd_common_impl(s_tag, &in[x], v_scalar, &out[x], v_scale, v_zero); + } + + if (x < length) + { + x = length - nlanes; + continue; // process unaligned tail + } + break; + } + return x; +} + +//------------------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE void divrc_simd_c3_calc(scale_tag_t s_tag, const uchar* inx, uchar* outx, + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, + const v_uint8& v_zero) +{ + v_uint8 div = vx_load(inx); + v_uint8 v_mask = (div == v_zero); + + v_uint16 div1 = v_expand_low(div); + v_uint16 div2 = v_expand_high(div); + + v_float32 fdiv1 = v_cvt_f32(v_reinterpret_as_s32(v_expand_low(div1))); + v_float32 fdiv2 = v_cvt_f32(v_reinterpret_as_s32(v_expand_high(div1))); + v_float32 fdiv3 = v_cvt_f32(v_reinterpret_as_s32(v_expand_low(div2))); + v_float32 fdiv4 = v_cvt_f32(v_reinterpret_as_s32(v_expand_high(div2))); + + vx_store(outx, + v_select(v_mask, v_zero, v_pack_u(v_pack(v_round(div_op(s_tag, s1, fdiv1, v_scale)), + v_round(div_op(s_tag, s2, fdiv2, v_scale))), + v_pack(v_round(div_op(s_tag, s3, fdiv3, v_scale)), + v_round(div_op(s_tag, s1, fdiv4, v_scale)))))); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::value || + std::is_same::value, void>::type +divrc_simd_c3_calc(scale_tag_t s_tag, const SRC* inx, uchar* outx, + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, + const v_int16& v_zero) +{ + constexpr int nlanes = v_uint8::nlanes; + + v_int16 div1 = v_reinterpret_as_s16(vx_load(inx)); + v_int16 div2 = v_reinterpret_as_s16(vx_load(&inx[nlanes / 2])); + + v_int16 v_mask1 = (div1 == v_zero); + v_int16 v_mask2 = (div2 == v_zero); + + v_float32 fdiv1 = v_cvt_f32(v_expand_low(div1)); + v_float32 fdiv2 = v_cvt_f32(v_expand_high(div1)); + v_float32 fdiv3 = v_cvt_f32(v_expand_low(div2)); + v_float32 fdiv4 = v_cvt_f32(v_expand_high(div2)); + + vx_store(outx, + v_pack_u(v_select(v_mask1, v_zero, + v_pack(v_round(div_op(s_tag, s1, fdiv1, v_scale)), + v_round(div_op(s_tag, s2, fdiv2, v_scale)))), + v_select(v_mask2, v_zero, + v_pack(v_round(div_op(s_tag, s3, fdiv3, v_scale)), + v_round(div_op(s_tag, s1, fdiv4, v_scale)))))); +} + +template +CV_ALWAYS_INLINE void divrc_simd_c3_calc(scale_tag_t s_tag, const float* inx, uchar* outx, + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, + const v_float32& v_zero) +{ + constexpr int nlanes = v_uint8::nlanes; + + v_float32 fdiv1 = vg_load_f32(inx); + v_float32 fdiv2 = vg_load_f32(&inx[nlanes / 4]); + v_float32 fdiv3 = vg_load_f32(&inx[nlanes / 2]); + v_float32 fdiv4 = vg_load_f32(&inx[3 * nlanes / 4]); + + v_float32 v_mask1 = (fdiv1 == v_zero); + v_float32 v_mask2 = (fdiv2 == v_zero); + v_float32 v_mask3 = (fdiv3 == v_zero); + v_float32 v_mask4 = (fdiv4 == v_zero); + + vx_store(outx, + v_pack_u(v_pack(v_round(v_select(v_mask1, v_zero, div_op(s_tag, s1, fdiv1, v_scale))), + v_round(v_select(v_mask2, v_zero, div_op(s_tag, s2, fdiv2, v_scale)))), + v_pack(v_round(v_select(v_mask3, v_zero, div_op(s_tag, s3, fdiv3, v_scale))), + v_round(v_select(v_mask4, v_zero, div_op(s_tag, s1, fdiv4, v_scale)))))); + +} + +template +CV_ALWAYS_INLINE int divrc_simd_c3_impl(scale_tag_t s_tag, const SRC in[], uchar out[], + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, + const int length, const int nlanes, const int lanes) +{ + univ_zero_vec_type_of_t v_zero = + vx_setall::lane_type>(0); + + int x = 0; + for (;;) + { + for (; x <= length - lanes; x += lanes) + { + divrc_simd_c3_calc(s_tag, &in[x], &out[x], s1, s2, s3, v_scale, v_zero); + divrc_simd_c3_calc(s_tag, &in[x + nlanes], &out[x + nlanes], s2, s3, s1, v_scale, v_zero); + divrc_simd_c3_calc(s_tag, &in[x + 2 * nlanes], &out[x + 2 * nlanes], s3, s1, s2, v_scale, v_zero); + } + + if (x < length) + { + x = length - lanes; + continue; // process unaligned tail + } + break; + } + return x; +} + +//--------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE +typename std::enable_if::value || + std::is_same::value, void>::type +divrc_simd_c3_calc(scale_tag_t s_tag, const uchar* inx, DST* outx, + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, + const v_int16& v_zero) +{ + constexpr int nlanes = vector_type_of_t::nlanes; + v_uint8 div = vx_load(inx); + + v_int16 div1 = v_reinterpret_as_s16(v_expand_low(div)); + v_int16 div2 = v_reinterpret_as_s16(v_expand_high(div)); + v_int16 div3 = v_reinterpret_as_s16(vx_load_expand(&inx[2 * nlanes])); + + v_float32 fdiv1 = v_cvt_f32(v_expand_low(div1)); + v_float32 fdiv2 = v_cvt_f32(v_expand_high(div1)); + v_float32 fdiv3 = v_cvt_f32(v_expand_low(div2)); + v_float32 fdiv4 = v_cvt_f32(v_expand_high(div2)); + v_float32 fdiv5 = v_cvt_f32(v_expand_low(div3)); + v_float32 fdiv6 = v_cvt_f32(v_expand_high(div3)); + + v_store_select(outx, div1, v_zero, v_round(div_op(s_tag, s1, fdiv1, v_scale)), + v_round(div_op(s_tag, s2, fdiv2, v_scale))); + v_store_select(&outx[nlanes], div2, v_zero, v_round(div_op(s_tag, s3, fdiv3, v_scale)), + v_round(div_op(s_tag, s1, fdiv4, v_scale))); + v_store_select(&outx[2*nlanes], div3, v_zero, v_round(div_op(s_tag, s2, fdiv5, v_scale)), + v_round(div_op(s_tag, s3, fdiv6, v_scale))); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if<(std::is_same::value && std::is_same::value) || + (std::is_same::value && std::is_same::value) || + (std::is_same::value && std::is_same::value) || + (std::is_same::value && std::is_same::value), void>::type +divrc_simd_c3_calc(scale_tag_t s_tag, const SRC* inx, DST* outx, + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, + const v_int16& v_zero) +{ + constexpr int nlanes = vector_type_of_t::nlanes; + + v_int16 div1 = v_reinterpret_as_s16(vx_load(inx)); + v_int16 div2 = v_reinterpret_as_s16(vx_load(&inx[nlanes])); + v_int16 div3 = v_reinterpret_as_s16(vx_load(&inx[2*nlanes])); + + v_float32 fdiv1 = v_cvt_f32(v_expand_low(div1)); + v_float32 fdiv2 = v_cvt_f32(v_expand_high(div1)); + v_float32 fdiv3 = v_cvt_f32(v_expand_low(div2)); + v_float32 fdiv4 = v_cvt_f32(v_expand_high(div2)); + v_float32 fdiv5 = v_cvt_f32(v_expand_low(div3)); + v_float32 fdiv6 = v_cvt_f32(v_expand_high(div3)); + + v_store_select(outx, div1, v_zero, v_round(div_op(s_tag, s1, fdiv1, v_scale)), + v_round(div_op(s_tag, s2, fdiv2, v_scale))); + v_store_select(&outx[nlanes], div2, v_zero, v_round(div_op(s_tag, s3, fdiv3, v_scale)), + v_round(div_op(s_tag, s1, fdiv4, v_scale))); + v_store_select(&outx[2*nlanes], div3, v_zero, v_round(div_op(s_tag, s2, fdiv5, v_scale)), + v_round(div_op(s_tag, s3, fdiv6, v_scale))); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::value || + std::is_same::value, void>::type +divrc_simd_c3_calc(scale_tag_t s_tag, const float* inx, DST* outx, + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, + const v_float32& v_zero) +{ + constexpr int nlanes = vector_type_of_t::nlanes; + + v_float32 fdiv1 = vg_load_f32(inx); + v_float32 fdiv2 = vg_load_f32(&inx[nlanes/2]); + v_float32 fdiv3 = vg_load_f32(&inx[nlanes]); + v_float32 fdiv4 = vg_load_f32(&inx[3*nlanes/2]); + v_float32 fdiv5 = vg_load_f32(&inx[2*nlanes]); + v_float32 fdiv6 = vg_load_f32(&inx[5*nlanes/2]); + + v_store_i16(outx, v_round(v_select(fdiv1 == v_zero, v_zero, div_op(s_tag, s1, fdiv1, v_scale))), + v_round(v_select(fdiv2 == v_zero, v_zero, div_op(s_tag, s2, fdiv2, v_scale)))); + v_store_i16(&outx[nlanes], v_round(v_select(fdiv3 == v_zero, v_zero, div_op(s_tag, s3, fdiv3, v_scale))), + v_round(v_select(fdiv4 == v_zero, v_zero, div_op(s_tag, s1, fdiv4, v_scale)))); + v_store_i16(&outx[2*nlanes], v_round(v_select(fdiv5 == v_zero, v_zero, div_op(s_tag, s2, fdiv5, v_scale))), + v_round(v_select(fdiv6 == v_zero, v_zero, div_op(s_tag, s3, fdiv6, v_scale)))); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::value || + std::is_same::value, int>::type +divrc_simd_c3_impl(scale_tag_t s_tag, const SRC in[], DST out[], const v_float32& s1, + const v_float32& s2, const v_float32& s3, + const v_float32& v_scale, const int length, + const int, const int lanes) +{ + zero_vec_type_of_t v_zero = + vx_setall::lane_type>(0); + + int x = 0; + for (;;) + { + for (; x <= length - lanes; x += lanes) + { + divrc_simd_c3_calc(s_tag, &in[x], &out[x], s1, s2, s3, v_scale, v_zero); + } + + if (x < length) + { + x = length - lanes; + continue; // process unaligned tail + } + break; + } + return x; +} + +//--------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE int divrc_simd_c3_impl(scale_tag_t s_tag, const SRC* in, float* out, + const v_float32& s1, const v_float32& s2, + const v_float32& s3, const v_float32& v_scale, + const int length, const int nlanes, const int lanes) +{ + int x = 0; + for (;;) + { + for (; x <= length - lanes; x += lanes) + { + v_float32 div1 = vg_load_f32(&in[x]); + v_float32 div2 = vg_load_f32(&in[x + nlanes]); + v_float32 div3 = vg_load_f32(&in[x + 2*nlanes]); + + vx_store(&out[x], div_op(s_tag, s1, div1, v_scale)); + vx_store(&out[x + nlanes], div_op(s_tag, s2, div2, v_scale)); + vx_store(&out[x + 2*nlanes], div_op(s_tag, s3, div3, v_scale)); + } + + if (x < length) + { + x = length - lanes; + continue; // process unaligned tail + } + break; + } + return x; +} + +//------------------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE int divrc_simd_c3(scale_tag_t s_tag, const SRC in[], + const float scalar[], DST out[], + const int length, const float scale) +{ + constexpr int chan = 3; + constexpr int nlanes = vector_type_of_t::nlanes; + constexpr int lanes = chan * nlanes; + + if (length < lanes) + return 0; + + v_float32 v_scale = vx_setall_f32(scale); + + v_float32 s1 = vx_load(scalar); +#if CV_SIMD_WIDTH == 32 + v_float32 s2 = vx_load(&scalar[2]); + v_float32 s3 = vx_load(&scalar[1]); +#else + v_float32 s2 = vx_load(&scalar[1]); + v_float32 s3 = vx_load(&scalar[2]); +#endif + return divrc_simd_c3_impl(s_tag, in, out, s1, s2, s3, v_scale, length, nlanes, lanes); +} + +#define DIVRC_SIMD(SRC, DST) \ +int divrc_simd(const float scalar[], const SRC in[], DST out[], \ + const int length, const int chan, const float scale) \ +{ \ + switch (chan) \ + { \ + case 1: \ + case 2: \ + case 4: \ + { \ + if (std::fabs(scale - 1.0f) <= FLT_EPSILON) \ + { \ + return divrc_simd_common(not_scale_tag{}, in, scalar, \ + out, length, scale); \ + } \ + else \ + { \ + return divrc_simd_common(scale_tag{}, in, scalar, out, \ + length, scale); \ + } \ + } \ + case 3: \ + { \ + if (std::fabs(scale - 1.0f) <= FLT_EPSILON) \ + { \ + return divrc_simd_c3(not_scale_tag{}, in, scalar, \ + out, length, scale); \ + } \ + else \ + { \ + return divrc_simd_c3(scale_tag{}, in, scalar, out, \ + length, scale); \ + } \ + } \ + default: \ + GAPI_Assert(chan <= 4); \ + break; \ + } \ + return 0; \ +} + +DIVRC_SIMD(uchar, uchar) +DIVRC_SIMD(ushort, uchar) +DIVRC_SIMD(short, uchar) +DIVRC_SIMD(float, uchar) +DIVRC_SIMD(short, short) +DIVRC_SIMD(ushort, short) +DIVRC_SIMD(uchar, short) +DIVRC_SIMD(float, short) +DIVRC_SIMD(ushort, ushort) +DIVRC_SIMD(uchar, ushort) +DIVRC_SIMD(short, ushort) +DIVRC_SIMD(float, ushort) +DIVRC_SIMD(uchar, float) +DIVRC_SIMD(ushort, float) +DIVRC_SIMD(short, float) +DIVRC_SIMD(float, float) + +#undef DIVRC_SIMD + //------------------------- // // Fluid kernels: Split3 // //------------------------- -int split3_simd(const uchar in[], uchar out1[], uchar out2[], - uchar out3[], const int width) +int split3_simd(const uchar in[], uchar out1[], uchar out2[], uchar out3[], + const int width) { constexpr int nlanes = v_uint8::nlanes; if (width < nlanes) @@ -2114,6 +2593,545 @@ int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], return x; } +//------------------------- +// +// Fluid kernels: Add +// +//------------------------- +template +CV_ALWAYS_INLINE VT oper(add_tag, const VT& a, const VT& b) +{ + return a + b; +} + +template +CV_ALWAYS_INLINE VT oper(sub_tag, const VT& a, const VT& b) +{ + return a - b; +} + +CV_ALWAYS_INLINE void pack_store_uchar(uchar* outx, const v_uint16& c1, const v_uint16& c2) +{ + vx_store(outx, v_pack(c1, c2)); +} + +CV_ALWAYS_INLINE void pack_store_uchar(uchar* outx, const v_int16& c1, const v_int16& c2) +{ + vx_store(outx, v_pack_u(c1, c2)); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::value, void>::type +arithmOp_simd_impl(oper_tag op, const SRC* in1x, const SRC* in2x, DST* outx) +{ + vector_type_of_t a = vx_load(in1x); + vector_type_of_t b = vx_load(in2x); + vx_store(outx, oper(op, a, b)); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::value || + std::is_same::value, void>::type +arithmOp_simd_impl(oper_tag op, const SRC* in1x, const SRC* in2x, uchar* outx) +{ + constexpr int nlanes = v_uint8::nlanes; + + vector_type_of_t a1 = vx_load(in1x); + vector_type_of_t a2 = vx_load(&in1x[nlanes / 2]); + vector_type_of_t b1 = vx_load(in2x); + vector_type_of_t b2 = vx_load(&in2x[nlanes / 2]); + + pack_store_uchar(outx, oper(op, a1, b1), oper(op, a2, b2)); +} + +template +CV_ALWAYS_INLINE void arithmOp_simd_impl(oper_tag op, const float* in1x, + const float* in2x, uchar* outx) +{ + constexpr int nlanes = v_uint8::nlanes; + + v_float32 a1 = vx_load(in1x); + v_float32 a2 = vx_load(&in1x[nlanes / 4]); + v_float32 a3 = vx_load(&in1x[2 * nlanes / 4]); + v_float32 a4 = vx_load(&in1x[3 * nlanes / 4]); + + v_float32 b1 = vx_load(in2x); + v_float32 b2 = vx_load(&in2x[nlanes / 4]); + v_float32 b3 = vx_load(&in2x[2 * nlanes / 4]); + v_float32 b4 = vx_load(&in2x[3 * nlanes / 4]); + + vx_store(outx, v_pack_u(v_pack(v_round(oper(op, a1, b1)), v_round(oper(op, a2, b2))), + v_pack(v_round(oper(op, a3, b3)), v_round(oper(op, a4, b4))))); +} + +template +CV_ALWAYS_INLINE void arithmOp_simd_impl(oper_tag op, const uchar* in1x, + const uchar* in2x, short* outx) +{ + v_int16 a = v_reinterpret_as_s16(vx_load_expand(in1x)); + v_int16 b = v_reinterpret_as_s16(vx_load_expand(in2x)); + + vx_store(outx, oper(op, a, b)); +} + +template +CV_ALWAYS_INLINE void arithmOp_simd_impl(oper_tag op, const uchar* in1x, + const uchar* in2x, ushort* outx) +{ + v_uint16 a = vx_load_expand(in1x); + v_uint16 b = vx_load_expand(in2x); + + vx_store(outx, oper(op, a, b)); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::value || + std::is_same::value, void>::type +arithmOp_simd_impl(oper_tag op, const float* in1x, const float* in2x, DST* outx) +{ + constexpr int nlanes = vector_type_of_t::nlanes; + v_float32 a1 = vx_load(in1x); + v_float32 a2 = vx_load(&in1x[nlanes/2]); + v_float32 b1 = vx_load(in2x); + v_float32 b2 = vx_load(&in2x[nlanes/2]); + + v_store_i16(outx, v_round(oper(op, a1, b1)), v_round(oper(op, a2, b2))); +} + +template +CV_ALWAYS_INLINE void arithmOp_simd_impl(oper_tag op, const short* in1x, + const short* in2x, ushort* outx) +{ + v_int16 a = vx_load(in1x); + v_int32 a1 = v_expand_low(a); + v_int32 a2 = v_expand_high(a); + + v_int16 b = vx_load(in2x); + v_int32 b1 = v_expand_low(b); + v_int32 b2 = v_expand_high(b); + + vx_store(outx, v_pack_u(oper(op, a1, b1), oper(op, a2, b2))); +} + +template +CV_ALWAYS_INLINE void arithmOp_simd_impl(oper_tag op, const ushort* in1x, + const ushort* in2x, short* outx) +{ + v_int16 a = v_reinterpret_as_s16(vx_load(in1x)); + v_int32 a1 = v_expand_low(a); + v_int32 a2 = v_expand_high(a); + + v_int16 b = v_reinterpret_as_s16(vx_load(in2x)); + v_int32 b1 = v_expand_low(b); + v_int32 b2 = v_expand_high(b); + + vx_store(outx, v_pack(oper(op, a1, b1), oper(op, a2, b2))); +} + +template +CV_ALWAYS_INLINE void arithmOp_simd_impl(oper_tag op, const SRC* in1x, const SRC* in2x, float* outx) +{ + v_float32 a = vg_load_f32(in1x); + v_float32 b = vg_load_f32(in2x); + + vx_store(outx, oper(op, a, b)); +} + +template +CV_ALWAYS_INLINE int arithmOp_simd(oper_tag op, const SRC in1[], const SRC in2[], + DST out[], const int length) +{ + constexpr int nlanes = vector_type_of_t::nlanes; + + if (length < nlanes) + return 0; + + int x = 0; + for (;;) + { + for (; x <= length - nlanes; x += nlanes) + { + arithmOp_simd_impl(op, &in1[x], &in2[x], &out[x]); + } + + if (x < length) + { + x = length - nlanes; + continue; + } + break; + } + + return x; +} + +#define ADD_SIMD(SRC, DST) \ +int add_simd(const SRC in1[], const SRC in2[], DST out[], const int length) \ +{ \ + return arithmOp_simd(add_tag{}, in1, in2, out, length); \ +} \ + +ADD_SIMD(uchar, uchar) +ADD_SIMD(ushort, uchar) +ADD_SIMD(short, uchar) +ADD_SIMD(float, uchar) +ADD_SIMD(short, short) +ADD_SIMD(ushort, short) +ADD_SIMD(uchar, short) +ADD_SIMD(float, short) +ADD_SIMD(ushort, ushort) +ADD_SIMD(uchar, ushort) +ADD_SIMD(short, ushort) +ADD_SIMD(float, ushort) +ADD_SIMD(uchar, float) +ADD_SIMD(ushort, float) +ADD_SIMD(short, float) +ADD_SIMD(float, float) + +#undef ADD_SIMD + +//------------------------- +// +// Fluid kernels: Sub +// +//------------------------- + +#define SUB_SIMD(SRC, DST) \ +int sub_simd(const SRC in1[], const SRC in2[], DST out[], const int length) \ +{ \ + return arithmOp_simd(sub_tag{}, in1, in2, out, length); \ +} \ + +SUB_SIMD(uchar, uchar) +SUB_SIMD(ushort, uchar) +SUB_SIMD(short, uchar) +SUB_SIMD(float, uchar) +SUB_SIMD(short, short) +SUB_SIMD(ushort, short) +SUB_SIMD(uchar, short) +SUB_SIMD(float, short) +SUB_SIMD(ushort, ushort) +SUB_SIMD(uchar, ushort) +SUB_SIMD(short, ushort) +SUB_SIMD(float, ushort) +SUB_SIMD(uchar, float) +SUB_SIMD(ushort, float) +SUB_SIMD(short, float) +SUB_SIMD(float, float) + +#undef SUB_SIMD + +//------------------------- +// +// Fluid kernels: ConvertTo +// +//------------------------- + +CV_ALWAYS_INLINE void store_i16(ushort* outx, const v_uint16& res) +{ + vx_store(outx, res); +} + +CV_ALWAYS_INLINE void store_i16(short* outx, const v_uint16& res) +{ + vx_store(outx, v_reinterpret_as_s16(res)); +} + +CV_ALWAYS_INLINE void store_i16(ushort* outx, const v_int16& res) +{ + vx_store(outx, v_reinterpret_as_u16(res)); +} + +CV_ALWAYS_INLINE void store_i16(short* outx, const v_int16& res) +{ + vx_store(outx, res); +} + +CV_ALWAYS_INLINE void convertto_simd_nocoeff_impl(const float* inx, uchar* outx) +{ + constexpr int nlanes = v_uint8::nlanes; + + v_int32 a1 = v_round(vx_load(inx)); + v_int32 a2 = v_round(vx_load(&inx[nlanes/4])); + v_int32 a3 = v_round(vx_load(&inx[nlanes/2])); + v_int32 a4 = v_round(vx_load(&inx[3*nlanes/4])); + + v_int16 r1 = v_pack(a1, a2); + v_int16 r2 = v_pack(a3, a4); + + vx_store(outx, v_pack_u(r1, r2)); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::type +convertto_simd_nocoeff_impl(const SRC* inx, uchar* outx) +{ + constexpr int nlanes = v_uint8::nlanes; + + vector_type_of_t a1 = vx_load(inx); + vector_type_of_t a2 = vx_load(&inx[nlanes/2]); + + pack_store_uchar(outx, a1, a2); +} + +//--------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE +typename std::enable_if::type +convertto_simd_nocoeff_impl(const float* inx, DST* outx) +{ + constexpr int nlanes = vector_type_of_t::nlanes; + + v_int32 a1 = v_round(vx_load(inx)); + v_int32 a2 = v_round(vx_load(&inx[nlanes/2])); + + v_store_i16(outx, a1, a2); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::type +convertto_simd_nocoeff_impl(const uchar* inx, DST* outx) +{ + v_uint8 a = vx_load(inx); + v_uint16 res = v_expand_low(a); + + store_i16(outx, res); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::type +convertto_simd_nocoeff_impl(const SRC* inx, DST* outx) +{ + vector_type_of_t a = vx_load(inx); + store_i16(outx, a); +} + +//--------------------------------------------------------------------------------------- + +template +CV_ALWAYS_INLINE void convertto_simd_nocoeff_impl(const SRC* inx, float* outx) +{ + v_float32 a = vg_load_f32(inx); + vx_store(outx, a); +} + +#define CONVERTTO_NOCOEF_SIMD(SRC, DST) \ +int convertto_simd(const SRC in[], DST out[], const int length) \ +{ \ + constexpr int nlanes = vector_type_of_t::nlanes; \ + \ + int x = 0; \ + for (;;) \ + { \ + for (; x <= length - nlanes; x += nlanes) \ + { \ + convertto_simd_nocoeff_impl(&in[x], &out[x]); \ + } \ + if (x < length) \ + { \ + x = length - nlanes; \ + continue; \ + } \ + break; \ + } \ + return x; \ +} + +CONVERTTO_NOCOEF_SIMD(ushort, uchar) +CONVERTTO_NOCOEF_SIMD(short, uchar) +CONVERTTO_NOCOEF_SIMD(float, uchar) +CONVERTTO_NOCOEF_SIMD(ushort, short) +CONVERTTO_NOCOEF_SIMD(uchar, short) +CONVERTTO_NOCOEF_SIMD(float, short) +CONVERTTO_NOCOEF_SIMD(uchar, ushort) +CONVERTTO_NOCOEF_SIMD(short, ushort) +CONVERTTO_NOCOEF_SIMD(float, ushort) +CONVERTTO_NOCOEF_SIMD(uchar, float) +CONVERTTO_NOCOEF_SIMD(ushort, float) +CONVERTTO_NOCOEF_SIMD(short, float) + +#undef CONVERTTO_NOCOEF_SIMD + +CV_ALWAYS_INLINE void convertto_scaled_simd_impl(const float* inx, uchar* outx, + const v_float32& v_alpha, + const v_float32& v_beta) +{ + constexpr int nlanes = v_uint8::nlanes; + + v_float32 a1 = vx_load(inx); + v_float32 a2 = vx_load(&inx[nlanes / 4]); + v_float32 a3 = vx_load(&inx[nlanes / 2]); + v_float32 a4 = vx_load(&inx[3 * nlanes / 4]); + + v_int32 r1 = v_round(v_fma(a1, v_alpha, v_beta)); + v_int32 r2 = v_round(v_fma(a2, v_alpha, v_beta)); + v_int32 r3 = v_round(v_fma(a3, v_alpha, v_beta)); + v_int32 r4 = v_round(v_fma(a4, v_alpha, v_beta)); + + vx_store(outx, v_pack_u(v_pack(r1, r2), v_pack(r3, r4))); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::type +convertto_scaled_simd_impl(const SRC* inx, uchar* outx, const v_float32& v_alpha, + const v_float32& v_beta) +{ + constexpr int nlanes = v_uint8::nlanes; + + v_int16 a = v_reinterpret_as_s16(vx_load(inx)); + v_int16 b = v_reinterpret_as_s16(vx_load(&inx[nlanes / 2])); + + v_float32 a1 = v_cvt_f32(v_expand_low(a)); + v_float32 a2 = v_cvt_f32(v_expand_high(a)); + v_float32 b1 = v_cvt_f32(v_expand_low(b)); + v_float32 b2 = v_cvt_f32(v_expand_high(b)); + + v_int32 r1 = v_round(v_fma(a1, v_alpha, v_beta)); + v_int32 r2 = v_round(v_fma(a2, v_alpha, v_beta)); + v_int32 r3 = v_round(v_fma(b1, v_alpha, v_beta)); + v_int32 r4 = v_round(v_fma(b2, v_alpha, v_beta)); + + vx_store(outx, v_pack_u(v_pack(r1, r2), v_pack(r3, r4))); +} + +CV_ALWAYS_INLINE void convertto_scaled_simd_impl(const uchar* inx, uchar* outx, + const v_float32& v_alpha, + const v_float32& v_beta) +{ + v_uint8 a = vx_load(inx); + v_int16 a1 = v_reinterpret_as_s16(v_expand_low(a)); + v_int16 a2 = v_reinterpret_as_s16(v_expand_high(a)); + + v_float32 f1 = v_cvt_f32(v_expand_low(a1)); + v_float32 f2 = v_cvt_f32(v_expand_high(a1)); + + v_float32 f3 = v_cvt_f32(v_expand_low(a2)); + v_float32 f4 = v_cvt_f32(v_expand_high(a2)); + + v_int32 r1 = v_round(v_fma(f1, v_alpha, v_beta)); + v_int32 r2 = v_round(v_fma(f2, v_alpha, v_beta)); + v_int32 r3 = v_round(v_fma(f3, v_alpha, v_beta)); + v_int32 r4 = v_round(v_fma(f4, v_alpha, v_beta)); + + vx_store(outx, v_pack_u(v_pack(r1, r2), v_pack(r3, r4))); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::type +convertto_scaled_simd_impl(const float* inx, DST* outx, + const v_float32& v_alpha, + const v_float32& v_beta) +{ + constexpr int nlanes = vector_type_of_t::nlanes; + + v_float32 a1 = vx_load(inx); + v_float32 a2 = vx_load(&inx[nlanes / 2]); + + v_int32 r1 = v_round(v_fma(a1, v_alpha, v_beta)); + v_int32 r2 = v_round(v_fma(a2, v_alpha, v_beta)); + + v_store_i16(outx, r1, r2); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::type +convertto_scaled_simd_impl(const uchar* inx, DST* outx, + const v_float32& v_alpha, + const v_float32& v_beta) +{ + v_int16 a = v_reinterpret_as_s16(vx_load_expand(inx)); + + v_float32 a1 = v_cvt_f32(v_expand_low(a)); + v_float32 a2 = v_cvt_f32(v_expand_high(a)); + + v_int32 r1 = v_round(v_fma(a1, v_alpha, v_beta)); + v_int32 r2 = v_round(v_fma(a2, v_alpha, v_beta)); + + v_store_i16(outx, r1, r2); +} + +template +CV_ALWAYS_INLINE +typename std::enable_if::type +convertto_scaled_simd_impl(const SRC* inx, DST* outx, + const v_float32& v_alpha, + const v_float32& v_beta) +{ + v_int16 a = v_reinterpret_as_s16(vx_load(inx)); + + v_float32 a1 = v_cvt_f32(v_expand_low(a)); + v_float32 a2 = v_cvt_f32(v_expand_high(a)); + + v_int32 r1 = v_round(v_fma(a1, v_alpha, v_beta)); + v_int32 r2 = v_round(v_fma(a2, v_alpha, v_beta)); + + v_store_i16(outx, r1, r2); +} + +template +CV_ALWAYS_INLINE void convertto_scaled_simd_impl(const SRC* inx, float* outx, + const v_float32& v_alpha, + const v_float32& v_beta) +{ + v_float32 a = vg_load_f32(inx); + vx_store(outx, v_fma(a, v_alpha, v_beta)); +} + +#define CONVERTTO_SCALED_SIMD(SRC, DST) \ +int convertto_scaled_simd(const SRC in[], DST out[], const float alpha, \ + const float beta, const int length) \ +{ \ + constexpr int nlanes = vector_type_of_t::nlanes; \ + v_float32 v_alpha = vx_setall_f32(alpha); \ + v_float32 v_beta = vx_setall_f32(beta); \ + \ + int x = 0; \ + for (;;) \ + { \ + for (; x <= length - nlanes; x += nlanes) \ + { \ + convertto_scaled_simd_impl(&in[x], &out[x], v_alpha, v_beta); \ + } \ + if (x < length) \ + { \ + x = length - nlanes; \ + continue; \ + } \ + break; \ + } \ + return x; \ +} + +CONVERTTO_SCALED_SIMD(uchar, uchar) +CONVERTTO_SCALED_SIMD(ushort, uchar) +CONVERTTO_SCALED_SIMD(short, uchar) +CONVERTTO_SCALED_SIMD(float, uchar) +CONVERTTO_SCALED_SIMD(short, short) +CONVERTTO_SCALED_SIMD(ushort, short) +CONVERTTO_SCALED_SIMD(uchar, short) +CONVERTTO_SCALED_SIMD(float, short) +CONVERTTO_SCALED_SIMD(ushort, ushort) +CONVERTTO_SCALED_SIMD(uchar, ushort) +CONVERTTO_SCALED_SIMD(short, ushort) +CONVERTTO_SCALED_SIMD(float, ushort) +CONVERTTO_SCALED_SIMD(uchar, float) +CONVERTTO_SCALED_SIMD(ushort, float) +CONVERTTO_SCALED_SIMD(short, float) +CONVERTTO_SCALED_SIMD(float, float) + +#undef CONVERTTO_SCALED_SIMD + #endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY CV_CPU_OPTIMIZATION_NAMESPACE_END diff --git a/modules/gapi/src/backends/fluid/gfluidimgproc.cpp b/modules/gapi/src/backends/fluid/gfluidimgproc.cpp index aca2dcca6f..bdd11b1214 100644 --- a/modules/gapi/src/backends/fluid/gfluidimgproc.cpp +++ b/modules/gapi/src/backends/fluid/gfluidimgproc.cpp @@ -25,6 +25,9 @@ #include "gfluidimgproc_func.hpp" +#if CV_AVX2 +#include "gfluidimgproc_simd_avx2.hpp" +#endif #if CV_SSE4_1 #include "gfluidcore_simd_sse41.hpp" #endif @@ -1866,10 +1869,11 @@ static inline double ratio(int inSz, int outSz) { } template -static inline void initScratchLinear(const cv::GMatDesc& in, - const Size& outSz, - cv::gapi::fluid::Buffer& scratch, - int lpi) { +CV_ALWAYS_INLINE void initScratchLinear(const cv::GMatDesc& in, + const Size& outSz, + cv::gapi::fluid::Buffer& scratch, + int lpi) +{ using alpha_type = typename Mapper::alpha_type; static const auto unity = Mapper::unity; @@ -1895,7 +1899,8 @@ static inline void initScratchLinear(const cv::GMatDesc& in, auto *clone = scr.clone; auto *index = scr.mapsx; - for (int x = 0; x < outSz.width; x++) { + for (int x = 0; x < outSz.width; ++x) + { auto map = Mapper::map(hRatio, 0, in.size.width, x); auto alpha0 = map.alpha0; auto index0 = map.index0; @@ -1930,7 +1935,7 @@ static inline void initScratchLinear(const cv::GMatDesc& in, alpha[x] = alpha0; index[x] = index0; - for (int l = 0; l < 4; l++) { + for (int l = 0; l < 4; ++l) { clone[4*x + l] = alpha0; } } @@ -1952,10 +1957,18 @@ struct MapperUnit { I index0, index1; }; -inline static uint8_t calc(short alpha0, uint8_t src0, short alpha1, uint8_t src1) { +CV_ALWAYS_INLINE uint8_t resize_calc_revert_fixedpoint(short alpha0, uint8_t src0, short alpha1, uint8_t src1) +{ constexpr static const int half = 1 << 14; return (src0 * alpha0 + src1 * alpha1 + half) >> 15; } + +CV_ALWAYS_INLINE float resize_main_calculation(float alpha0, float src0, float alpha1, float src1) +{ + return src0 * alpha0 + src1 * alpha1; +} + +namespace linear { struct Mapper { constexpr static const int ONE = 1 << 15; typedef short alpha_type; @@ -1980,11 +1993,38 @@ struct Mapper { return u; } }; +} // namespace linear + +namespace linear32f { +struct Mapper { + typedef float alpha_type; + typedef int index_type; + constexpr static const float unity = 1; + + typedef MapperUnit Unit; + + static inline Unit map(double ratio, int start, int max, int outCoord) { + float f = static_cast((outCoord + 0.5) * ratio - 0.5); + int s = cvFloor(f); + f -= s; + + Unit u; + + u.index0 = std::max(s - start, 0); + u.index1 = ((std::fabs(f) <= FLT_EPSILON) || s + 1 >= max) ? s - start : s - start + 1; + + u.alpha0 = 1.f - f; + u.alpha1 = f; + + return u; + } +}; +} // namespace linear32f template -static void calcRowLinearC(const cv::gapi::fluid::View & in, - cv::gapi::fluid::Buffer& out, - cv::gapi::fluid::Buffer& scratch) { +CV_ALWAYS_INLINE void calcRowLinearC(const cv::gapi::fluid::View & in, + cv::gapi::fluid::Buffer& out, + cv::gapi::fluid::Buffer& scratch) { using alpha_type = typename Mapper::alpha_type; auto inSz = in.meta().size; @@ -2052,14 +2092,89 @@ static void calcRowLinearC(const cv::gapi::fluid::View & in, for (int c = 0; c < numChan; c++) { auto idx0 = numChan*sx0 + c; auto idx1 = numChan*sx1 + c; - T tmp0 = calc(beta0, src0[l][idx0], beta1, src1[l][idx0]); - T tmp1 = calc(beta0, src0[l][idx1], beta1, src1[l][idx1]); - dst[l][numChan * x + c] = calc(alpha0, tmp0, alpha1, tmp1); + T tmp0 = resize_calc_revert_fixedpoint(beta0, src0[l][idx0], beta1, src1[l][idx0]); + T tmp1 = resize_calc_revert_fixedpoint(beta0, src0[l][idx1], beta1, src1[l][idx1]); + dst[l][numChan * x + c] = resize_calc_revert_fixedpoint(alpha0, tmp0, alpha1, tmp1); } } } } +template +CV_ALWAYS_INLINE void calcRowLinear(const cv::gapi::fluid::View& in, + cv::gapi::fluid::Buffer& out, + cv::gapi::fluid::Buffer& scratch) +{ + GAPI_DbgAssert((out.meta().depth == CV_32F) && (out.meta().chan == 1)); + + auto inSz = in.meta().size; + auto outSz = out.meta().size; + + auto inY = in.y(); + int length = out.length(); + int outY = out.y(); + int lpi = out.lpi(); + GAPI_DbgAssert(outY + lpi <= outSz.height); + + GAPI_DbgAssert(lpi <= 4); + + LinearScratchDesc scr(inSz.width, inSz.height, outSz.width, + outSz.height, scratch.OutLineB()); + + const auto* alpha = scr.alpha; + const auto* mapsx = scr.mapsx; + const auto* beta0 = scr.beta; + const auto* mapsy = scr.mapsy; + + const auto* beta = beta0 + outY; + const float* src0[4]; + const float* src1[4]; + float* dst[4]; + + for (int l = 0; l < lpi; ++l) + { + auto index0 = mapsy[outY + l] - inY; + auto index1 = mapsy[outSz.height + outY + l] - inY; + + src0[l] = in.InLine(index0); + src1[l] = in.InLine(index1); + dst[l] = out.OutLine(l); + } + +#if CV_AVX2 + // number floats in AVX2 SIMD vector. + constexpr int nlanes = 8; + + if (inSz.width >= nlanes && outSz.width >= nlanes) + { + avx2::calcRowLinear32FC1Impl(dst, src0, src1, alpha, mapsx, beta, + inSz, outSz, lpi); + + return; + } +#endif // CV_AVX2 + + using alpha_type = typename Mapper::alpha_type; + for (int l = 0; l < lpi; ++l) + { + constexpr static const auto unity = Mapper::unity; + + auto b0 = beta[l]; + auto b1 = saturate_cast(unity - beta[l]); + + for (int x = 0; x < length; ++x) { + auto alpha0 = alpha[x]; + auto alpha1 = saturate_cast(unity - alpha[x]); + auto sx0 = mapsx[x]; + auto sx1 = sx0 + 1; + + float tmp0 = resize_main_calculation(b0, src0[l][sx0], b1, src1[l][sx0]); + float tmp1 = resize_main_calculation(b0, src0[l][sx1], b1, src1[l][sx1]); + dst[l][x] = resize_main_calculation(alpha0, tmp0, alpha1, tmp1); + } + } +} + GAPI_FLUID_KERNEL(GFluidResize, cv::gapi::imgproc::GResize, true) { static const int Window = 1; @@ -2071,9 +2186,13 @@ GAPI_FLUID_KERNEL(GFluidResize, cv::gapi::imgproc::GResize, true) constexpr static const short ONE = INTER_RESIZE_COEF_SCALE; static void initScratch(const cv::GMatDesc& in, - cv::Size outSz, double fx, double fy, int /*interp*/, + cv::Size outSz, double fx, double fy, int interp, cv::gapi::fluid::Buffer &scratch) - { + { + GAPI_Assert((in.depth == CV_8U && in.chan == 3) || + (in.depth == CV_32F && in.chan == 1)); + GAPI_Assert(interp == cv::INTER_LINEAR); + int outSz_w; int outSz_h; if (outSz.width == 0 || outSz.height == 0) @@ -2088,32 +2207,45 @@ GAPI_FLUID_KERNEL(GFluidResize, cv::gapi::imgproc::GResize, true) } cv::Size outSize(outSz_w, outSz_h); - if (in.chan == 3) + if (in.depth == CV_8U && in.chan == 3) { - initScratchLinear(in, outSize, scratch, LPI); + initScratchLinear(in, outSize, scratch, LPI); } - else if (in.chan == 4) + else if (in.depth == CV_32F && in.chan == 1) { - initScratchLinear(in, outSize, scratch, LPI); + initScratchLinear(in, outSize, scratch, LPI); } - } + else + { + CV_Error(cv::Error::StsBadArg, "unsupported combination of type and number of channel"); + } + } static void resetScratch(cv::gapi::fluid::Buffer& /*scratch*/) {} - static void run(const cv::gapi::fluid::View& in, cv::Size /*sz*/, double /*fx*/, double /*fy*/, int interp, - cv::gapi::fluid::Buffer& out, - cv::gapi::fluid::Buffer& scratch) { - const int channels = in.meta().chan; - GAPI_Assert((channels == 3 || channels == 4) && (interp == cv::INTER_LINEAR)); + static void run(const cv::gapi::fluid::View& in, cv::Size /*sz*/, double /*fx*/, + double /*fy*/, int interp, cv::gapi::fluid::Buffer& out, + cv::gapi::fluid::Buffer& scratch) + { + GAPI_Assert((in.meta().depth == CV_8U && in.meta().chan == 3) || + (in.meta().depth == CV_32F && in.meta().chan == 1)); + GAPI_Assert(interp == cv::INTER_LINEAR); - if (channels == 3) + const int channels = in.meta().chan; + const int depth = in.meta().depth; + + if (depth == CV_8U && channels == 3) { - calcRowLinearC(in, out, scratch); + calcRowLinearC(in, out, scratch); } - else if (channels == 4) + else if (depth == CV_32F && channels == 1) { - calcRowLinearC(in, out, scratch); + calcRowLinear(in, out, scratch); + } + else + { + CV_Error(cv::Error::StsBadArg, "unsupported combination of type and number of channel"); } } }; diff --git a/modules/gapi/src/backends/fluid/gfluidimgproc_simd_avx2.hpp b/modules/gapi/src/backends/fluid/gfluidimgproc_simd_avx2.hpp new file mode 100644 index 0000000000..e246f0613b --- /dev/null +++ b/modules/gapi/src/backends/fluid/gfluidimgproc_simd_avx2.hpp @@ -0,0 +1,181 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#if !defined(GAPI_STANDALONE) + +#include "opencv2/gapi/own/saturate.hpp" + +#include + +#include "opencv2/core.hpp" + +#include + +#include +#include + +#include +#include +#include + +namespace cv { +namespace gapi { +namespace fluid { +namespace avx2 { + +CV_ALWAYS_INLINE void v_gather_pairs(const float src[], const int* mapsx, + v_float32x8& low, v_float32x8& high) +{ + low.val = _mm256_castsi256_ps(_mm256_setr_epi64x(*reinterpret_cast(&src[mapsx[0]]), + *reinterpret_cast(&src[mapsx[1]]), + *reinterpret_cast(&src[mapsx[2]]), + *reinterpret_cast(&src[mapsx[3]]))); + high.val = _mm256_castsi256_ps(_mm256_setr_epi64x(*reinterpret_cast(&src[mapsx[4]]), + *reinterpret_cast(&src[mapsx[5]]), + *reinterpret_cast(&src[mapsx[6]]), + *reinterpret_cast(&src[mapsx[7]]))); +} + +CV_ALWAYS_INLINE void v_deinterleave(const v_float32x8& low, const v_float32x8& high, + v_float32x8& even, v_float32x8& odd) +{ + __m256 tmp0 = _mm256_unpacklo_ps(low.val, high.val); + __m256 tmp1 = _mm256_unpackhi_ps(low.val, high.val); + __m256 tmp2 = _mm256_unpacklo_ps(tmp0, tmp1); + __m256 tmp3 = _mm256_unpackhi_ps(tmp0, tmp1); + even.val = _mm256_castsi256_ps(_mm256_permute4x64_epi64(_mm256_castps_si256(tmp2), 216 /*11011000*/)); + odd.val = _mm256_castsi256_ps(_mm256_permute4x64_epi64(_mm256_castps_si256(tmp3), 216 /*11011000*/)); +} + +// Resize (bi-linear, 32FC1) +CV_ALWAYS_INLINE void calcRowLinear32FC1Impl(float *dst[], + const float *src0[], + const float *src1[], + const float alpha[], + const int mapsx[], + const float beta[], + const Size& inSz, + const Size& outSz, + const int lpi) +{ + bool xRatioEq1 = inSz.width == outSz.width; + bool yRatioEq1 = inSz.height == outSz.height; + + constexpr int nlanes = v_float32x8::nlanes; + + if (!xRatioEq1 && !yRatioEq1) + { + for (int line = 0; line < lpi; ++line) { + float beta0 = beta[line]; + float beta1 = 1 - beta0; + v_float32x8 v_beta0 = v256_setall_f32(beta0); + int x = 0; + + v_float32x8 low1, high1, s00, s01; + v_float32x8 low2, high2, s10, s11; + for (; x <= outSz.width - nlanes; x += nlanes) + { + v_float32x8 alpha0 = v256_load(&alpha[x]); + // v_float32 alpha1 = 1.f - alpha0; + + v_gather_pairs(src0[line], &mapsx[x], low1, high1); + v_deinterleave(low1, high1, s00, s01); + + // v_float32 res0 = s00*alpha0 + s01*alpha1; + v_float32x8 res0 = v_fma(s00 - s01, alpha0, s01); + + v_gather_pairs(src1[line], &mapsx[x], low2, high2); + v_deinterleave(low2, high2, s10, s11); + + // v_float32 res1 = s10*alpha0 + s11*alpha1; + v_float32x8 res1 = v_fma(s10 - s11, alpha0, s11); + // v_float32 d = res0*beta0 + res1*beta1; + v_float32x8 d = v_fma(res0 - res1, v_beta0, res1); + + v_store(&dst[line][x], d); + } + + for (; x < outSz.width; ++x) + { + float alpha0 = alpha[x]; + float alpha1 = 1 - alpha0; + int sx0 = mapsx[x]; + int sx1 = sx0 + 1; + float res0 = src0[line][sx0] * alpha0 + src0[line][sx1] * alpha1; + float res1 = src1[line][sx0] * alpha0 + src1[line][sx1] * alpha1; + dst[line][x] = beta0 * res0 + beta1 * res1; + } + } + } + else if (!xRatioEq1) + { + + for (int line = 0; line < lpi; ++line) { + int x = 0; + + v_float32x8 low, high, s00, s01; + for (; x <= outSz.width - nlanes; x += nlanes) + { + v_float32x8 alpha0 = v256_load(&alpha[x]); + // v_float32 alpha1 = 1.f - alpha0; + + v_gather_pairs(src0[line], &mapsx[x], low, high); + v_deinterleave(low, high, s00, s01); + + // v_float32 d = s00*alpha0 + s01*alpha1; + v_float32x8 d = v_fma(s00 - s01, alpha0, s01); + + v_store(&dst[line][x], d); + } + + for (; x < outSz.width; ++x) { + float alpha0 = alpha[x]; + float alpha1 = 1 - alpha0; + int sx0 = mapsx[x]; + int sx1 = sx0 + 1; + dst[line][x] = src0[line][sx0] * alpha0 + src0[line][sx1] * alpha1; + } + } + + } + else if (!yRatioEq1) + { + int length = inSz.width; // == outSz.width + + for (int line = 0; line < lpi; ++line) { + float beta0 = beta[line]; + float beta1 = 1 - beta0; + v_float32x8 v_beta0 = v256_setall_f32(beta0); + int x = 0; + + for (; x <= length - nlanes; x += nlanes) + { + v_float32x8 s0 = v256_load(&src0[line][x]); + v_float32x8 s1 = v256_load(&src1[line][x]); + + // v_float32 d = s0*beta0 + s1*beta1; + v_float32x8 d = v_fma(s0 - s1, v_beta0, s1); + + v_store(&dst[line][x], d); + } + + for (; x < length; ++x) { + dst[line][x] = beta0 * src0[line][x] + beta1 * src1[line][x]; + } + } + + } + else + { + int length = inSz.width; // == outSz.width + memcpy(dst[0], src0[0], length * sizeof(float)*lpi); + } +} +} // namespace avx2 +} // namespace fliud +} // namespace gapi +} // namespace cv +#endif // !defined(GAPI_STANDALONE) diff --git a/modules/gapi/src/backends/ie/giebackend.cpp b/modules/gapi/src/backends/ie/giebackend.cpp index b155ff0aea..6647e484b1 100644 --- a/modules/gapi/src/backends/ie/giebackend.cpp +++ b/modules/gapi/src/backends/ie/giebackend.cpp @@ -64,6 +64,9 @@ template using QueueClass = cv::gapi::own::concurrent_bounded_queue< #include "utils/itt.hpp" +#include "streaming/onevpl/engine/preproc_engine_interface.hpp" +#include "streaming/onevpl/engine/preproc/preproc_dispatcher.hpp" + namespace IE = InferenceEngine; namespace { @@ -261,12 +264,36 @@ struct IEUnit { InferenceEngine::RemoteContext::Ptr rctx = nullptr; + std::shared_ptr preproc_engine_impl; + // FIXME: Unlike loadNetwork case, importNetwork requires that preprocessing // should be passed as ExecutableNetwork::SetBlob method, so need to collect // and store this information at the graph compilation stage (outMeta) and use in runtime. using PreProcMap = std::unordered_map; PreProcMap preproc_map; + // NEW FIXME: Need to aggregate getInputInfo & GetInputInfo from network + // into generic wrapper and invoke it at once in single place instead of + // analyzing ParamDesc::Kind::Load/Import every time when we need to get access + // for network info. + // In term of introducing custom VPP/VPL preprocessing functionality + // It was decided to use GFrameDesc as such aggregated network info with limitation + // that VPP/VPL produces cv::MediaFrame only. But it should be not considered as + // final solution + class InputFramesDesc { + using input_name_type = std::string; + using description_type = cv::GFrameDesc; + std::map map; + public: + static bool is_applicable(const cv::GMetaArg &mm); + const description_type &get_param(const input_name_type &input) const; + + void set_param(const input_name_type &input, + const IE::TensorDesc& desc); + }; + + InputFramesDesc net_input_params; + explicit IEUnit(const cv::gapi::ie::detail::ParamDesc &pp) : params(pp) { InferenceEngine::ParamMap* ctx_params = @@ -336,6 +363,17 @@ struct IEUnit { } else { cv::util::throw_error(std::logic_error("Unsupported ParamDesc::Kind")); } + + using namespace cv::gapi::wip::onevpl; + if (params.vpl_preproc_device.has_value() && params.vpl_preproc_ctx.has_value()) { + using namespace cv::gapi::wip; + GAPI_LOG_INFO(nullptr, "VPP preproc creation requested"); + preproc_engine_impl = + IPreprocEngine::create_preproc_engine( + params.vpl_preproc_device.value(), + params.vpl_preproc_ctx.value()); + GAPI_LOG_INFO(nullptr, "VPP preproc created successfuly"); + } } // This method is [supposed to be] called at Island compilation stage @@ -354,6 +392,39 @@ struct IEUnit { } }; +bool IEUnit::InputFramesDesc::is_applicable(const cv::GMetaArg &mm) { + return cv::util::holds_alternative(mm); +} + +const IEUnit::InputFramesDesc::description_type & +IEUnit::InputFramesDesc::get_param(const input_name_type &input) const { + auto it = map.find(input); + GAPI_Assert(it != map.end() && "No appropriate input is found in InputFramesDesc"); + return it->second; +} + +void IEUnit::InputFramesDesc::set_param(const input_name_type &input, + const IE::TensorDesc& desc) { + description_type ret; + ret.fmt = cv::MediaFormat::NV12; + const InferenceEngine::SizeVector& inDims = desc.getDims(); + auto layout = desc.getLayout(); + GAPI_LOG_DEBUG(nullptr, "network input: " << input << + ", tensor dims: " << inDims[0] << ", " << inDims[1] << + ", " << inDims[2] << ", " << inDims[3]); + if (layout != InferenceEngine::NHWC && layout != InferenceEngine::NCHW) { + GAPI_LOG_WARNING(nullptr, "Unsupported layout for VPP preproc: " << layout << + ", input name: " << input); + GAPI_Assert(false && "Unsupported layout for VPP preproc"); + } + GAPI_Assert(inDims.size() == 4u); + ret.size.width = static_cast(inDims[3]); + ret.size.height = static_cast(inDims[2]); + + auto res = map.emplace(input, ret); + GAPI_Assert(res.second && "Duplicated input info in InputFramesDesc are not allowable"); +} + class IECallContext { public: @@ -389,10 +460,16 @@ public: const IEUnit &uu; cv::gimpl::GIslandExecutable::IOutput &out; - // NB: Need to gurantee that MediaFrame::View don't die until request is over. + // NB: Need to guarantee that MediaFrame::View doesn't die until request is over. using Views = std::vector>; Views views; + // To store exception appeared in callback. + std::exception_ptr eptr; + + using req_key_t = void*; + cv::MediaFrame* prepareKeepAliveFrameSlot(req_key_t key); + size_t releaseKeepAliveFrame(req_key_t key); private: cv::detail::VectorRef& outVecRef(std::size_t idx); @@ -414,6 +491,10 @@ private: // Input parameters passed to an inference operation. cv::GArgs m_args; cv::GShapes m_in_shapes; + + // keep alive preprocessed frames + std::mutex keep_alive_frames_mutex; + std::unordered_map keep_alive_pp_frames; }; IECallContext::IECallContext(const IEUnit & unit, @@ -513,6 +594,36 @@ cv::GArg IECallContext::packArg(const cv::GArg &arg) { } } +cv::MediaFrame* IECallContext::prepareKeepAliveFrameSlot(req_key_t key) { + std::lock_guard lock(keep_alive_frames_mutex); + return &keep_alive_pp_frames[key]; +} + +size_t IECallContext::releaseKeepAliveFrame(req_key_t key) { + size_t elapsed_count = 0; + void *prev_slot = nullptr; + // NB: release MediaFrame previously captured by prepareKeepAliveFrameSlot + // We must capture it to keep a reference counter on inner media adapter + // to ensure that frame resource would be locked until inference done. + // Otherwise decoder could seized this frame resource as free/unlocked resource + // from resource pool + // Current function just take a unique frame `key` and overwrite stored + // actual frame by empty frame + { + std::lock_guard lock(keep_alive_frames_mutex); + auto ka_frame_it = keep_alive_pp_frames.find(key); + if (ka_frame_it != keep_alive_pp_frames.end()) { + prev_slot = &ka_frame_it->second; + ka_frame_it->second = cv::MediaFrame(); + } + elapsed_count = keep_alive_pp_frames.size(); + } + cv::util::suppress_unused_warning(prev_slot); + GAPI_LOG_DEBUG(nullptr, "Release keep alive frame, slot: " << prev_slot << + ", reserved frames count: " << elapsed_count); + return elapsed_count; +} + struct IECallable { static const char *name() { return "IERequestCallable"; } using Run = std::function, cv::gimpl::ie::RequestPool&)>; @@ -549,11 +660,65 @@ using GConstGIEModel = ade::ConstTypedGraph , IECallable >; -inline IE::Blob::Ptr extractRemoteBlob(IECallContext& ctx, std::size_t i) { +cv::MediaFrame preprocess_frame_impl(cv::MediaFrame &&in_frame, const std::string &layer_name, + IECallContext& ctx, + const cv::util::optional &opt_roi, + cv::MediaFrame* out_keep_alive_frame, + bool* out_is_preprocessed) { + cv::util::optional param = + ctx.uu.preproc_engine_impl->is_applicable(in_frame); + if (param.has_value()) { + GAPI_LOG_DEBUG(nullptr, "VPP preprocessing for decoded remote frame will be used"); + cv::GFrameDesc expected_net_input_descr = + ctx.uu.net_input_params.get_param(layer_name); + + // TODO: Find a better place to configure media format for GPU + // adjust color conversion to NV12 according to OV GPU limitation + if(ctx.uu.params.device_id.find("GPU") != std::string::npos && + ctx.uu.rctx) { + auto it = ctx.uu.params.config.find(std::string("GPU_NV12_TWO_INPUTS")); + if (it != ctx.uu.params.config.end()) { + if (it->second == "YES") { + GAPI_LOG_DEBUG(nullptr, "Adjust preprocessing GPU media format to NV12"); + expected_net_input_descr.fmt = cv::MediaFormat::NV12; + } + } + } + + cv::gapi::wip::pp_session pp_sess = + ctx.uu.preproc_engine_impl->initialize_preproc(param.value(), + expected_net_input_descr); + + in_frame = ctx.uu.preproc_engine_impl->run_sync(pp_sess, in_frame, opt_roi); + + if (out_keep_alive_frame != nullptr) { + GAPI_LOG_DEBUG(nullptr, "remember preprocessed remote frame to keep it busy from reuse, slot: " << + out_keep_alive_frame); + *out_keep_alive_frame = in_frame; + } + if (out_is_preprocessed) { + *out_is_preprocessed = true; + } + } // otherwise it is not suitable frame, then check on other preproc backend or rely on IE plugin + return std::move(in_frame); +} + +inline IE::Blob::Ptr extractRemoteBlob(IECallContext& ctx, std::size_t i, + const std::string &layer_name, + const cv::util::optional &opt_roi, + cv::MediaFrame* out_keep_alive_frame, + bool* out_is_preprocessed) { GAPI_Assert(ctx.inShape(i) == cv::GShape::GFRAME && "Remote blob is supported for MediaFrame only"); + cv::MediaFrame frame = ctx.inFrame(i); + if (ctx.uu.preproc_engine_impl) { + GAPI_LOG_DEBUG(nullptr, "Try to use preprocessing for decoded remote frame in remote ctx"); + frame = preprocess_frame_impl(std::move(frame), layer_name, ctx, opt_roi, + out_keep_alive_frame, out_is_preprocessed); + } - cv::util::any any_blob_params = ctx.inFrame(i).blobParams(); + // Request params for result frame whatever it got preprocessed or not + cv::util::any any_blob_params = frame.blobParams(); using ParamType = std::pair; using NV12ParamType = std::pair; @@ -579,14 +744,24 @@ inline IE::Blob::Ptr extractRemoteBlob(IECallContext& ctx, std::size_t i) { inline IE::Blob::Ptr extractBlob(IECallContext& ctx, std::size_t i, - cv::gapi::ie::TraitAs hint) { + cv::gapi::ie::TraitAs hint, + const std::string& layer_name, + const cv::util::optional &opt_roi, + cv::MediaFrame* out_keep_alive_frame = nullptr, + bool* out_is_preprocessed = nullptr) { if (ctx.uu.rctx != nullptr) { - return extractRemoteBlob(ctx, i); + return extractRemoteBlob(ctx, i, layer_name, opt_roi, + out_keep_alive_frame, out_is_preprocessed); } switch (ctx.inShape(i)) { case cv::GShape::GFRAME: { - const auto& frame = ctx.inFrame(i); + auto frame = ctx.inFrame(i); + if (ctx.uu.preproc_engine_impl) { + GAPI_LOG_DEBUG(nullptr, "Try to use preprocessing for decoded frame in local ctx"); + frame = preprocess_frame_impl(std::move(frame), layer_name, ctx, opt_roi, + out_keep_alive_frame, out_is_preprocessed); + } ctx.views.emplace_back(new cv::MediaFrame::View(frame.access(cv::MediaFrame::Access::R))); return wrapIE(*(ctx.views.back()), frame.desc()); } @@ -619,12 +794,24 @@ static void setBlob(InferenceEngine::InferRequest& req, static void setROIBlob(InferenceEngine::InferRequest& req, const std::string& layer_name, const IE::Blob::Ptr& blob, - const cv::Rect &roi, + const cv::Rect &roi, const IECallContext& ctx) { - if (ctx.uu.params.device_id.find("GPU") != std::string::npos) { - GAPI_LOG_DEBUG(nullptr, "Skip ROI blob creation for device_id: " << - ctx.uu.params.device_id << ", layer: " << layer_name); - setBlob(req, layer_name, blob, ctx); + if (ctx.uu.params.device_id.find("GPU") != std::string::npos && + ctx.uu.rctx) { + try { + // NB: make_shared_blob() cannot work with GPU NV12 & ROI at the moment. + // OpenVINO produces exception with unsupported status. + // To do not encounter with silent crash situation we should catch OV exception + // and suggest to avoid this problem by using inner preprocessing feature. + // VPP/VPL proprocessing are supported at the moment + setBlob(req, layer_name, IE::make_shared_blob(blob, toIE(roi)), ctx); + } catch (const std::exception &ex) { + GAPI_LOG_WARNING(nullptr, "cannot set ROI blob for layer: " << layer_name << + ", reason:\n" << ex.what() << + "\nTry using self GAPI preprocessing feature: " + " Check method `cfgPreprocessingParams` in `cv::gapi::ie::Params`"); + throw; + } } else { setBlob(req, layer_name, IE::make_shared_blob(blob, toIE(roi)), ctx); } @@ -654,7 +841,7 @@ std::vector cv::gimpl::ie::IECompiled::createInfe class cv::gimpl::ie::RequestPool { public: using RunF = std::function; - using CallbackF = std::function; + using CallbackF = std::function; // NB: The task is represented by: // RunF - function which is set blobs and run async inference. @@ -673,7 +860,7 @@ private: void callback(Task task, size_t id, IE::InferRequest request, - IE::StatusCode code); + IE::StatusCode code) noexcept; void setup(); QueueClass m_idle_ids; @@ -704,32 +891,28 @@ void cv::gimpl::ie::RequestPool::execute(cv::gimpl::ie::RequestPool::Task&& t) { static_cast( std::bind(&cv::gimpl::ie::RequestPool::callback, this, t, id, _1, _2))); - t.run(request); + // NB: InferRequest is already marked as busy + // in case of exception need to return it back to the idle. + try { + t.run(request); + } catch (...) { + request.SetCompletionCallback([](){}); + m_idle_ids.push(id); + throw; + } } void cv::gimpl::ie::RequestPool::callback(cv::gimpl::ie::RequestPool::Task task, size_t id, IE::InferRequest request, - IE::StatusCode code) { - // FIXME: Any exception which is arrised here must not leave this callback, - // because it won't be handled. - try { - if (code != IE::StatusCode::OK) { - throw std::logic_error("IE::InferRequest finished with not OK status"); - } - task.callback(request); - // NB: IE::InferRequest keeps the callback until the new one is set. - // Since user's callback might keep resources that should be released, - // need to destroy its after execution. - // Let's set the empty one to cause the destruction of a callback. - request.SetCompletionCallback([](){}); - m_idle_ids.push(id); - } catch (const std::exception& e) { - GAPI_LOG_FATAL(NULL, "Callback failed with error: " << e.what()); - //FIXME: Exception CAN't be rethrown here, since this callback works - // in separate IE thread and such scenarios aren't handled properly in - // G-API so far. - } + IE::StatusCode code) noexcept { + // NB: Inference is over. + // 1. Run callback + // 2. Destroy callback to free resources. + // 3. Mark InferRequest as idle. + task.callback(request, code); + request.SetCompletionCallback([](){}); + m_idle_ids.push(id); } // NB: Not thread-safe. @@ -780,22 +963,23 @@ cv::gimpl::ie::GIEExecutable::GIEExecutable(const ade::Graph &g, void cv::gimpl::ie::GIEExecutable::run(cv::gimpl::GIslandExecutable::IInput &in, cv::gimpl::GIslandExecutable::IOutput &out) { - // General alghoritm: + // General algorithm: // 1. Collect island inputs/outputs. // 2. Create kernel context. (Every kernel has his own context). // 3. If the EndOfStream message is recieved, wait until all passed task are done. - // 4. + // 4. If the Exception message is revieved, propagate it further. + // 5. // 5.1 Run the kernel. // 5.2 Kernel wait for all nececcary infer requests and start asynchronous execution. // 5.3 After the kernel is finished continue processing next frame. // - // 5. If graph is compiled in non-streaming mode, wait until all tasks are done. + // 6. If graph is compiled in non-streaming mode, wait until all tasks are done. std::vector input_objs; std::vector output_objs; - const auto &in_desc = in.desc(); - const auto in_msg = in.get(); + const auto &in_desc = in.desc(); + auto in_msg = in.get(); if (cv::util::holds_alternative(in_msg)) { @@ -833,10 +1017,20 @@ void cv::gimpl::ie::GIEExecutable::run(cv::gimpl::GIslandExecutable::IInput &in const auto &kk = giem.metadata(this_nh).get(); - // (4) Run the kernel. - kk.run(ctx, *m_reqPool); + // (5) Run the kernel. + try { + kk.run(ctx, *m_reqPool); + } catch (...) { + auto eptr = std::current_exception(); + for (auto i : ade::util::iota(ctx->uu.params.num_out)) + { + auto output = ctx->output(i); + ctx->out.post(std::move(output), eptr); + } + return; + } - // (5) In non-streaming mode need to wait until the all tasks are done + // (6) In non-streaming mode need to wait until the all tasks are done // FIXME: Is there more graceful way to handle this case ? if (!m_gm.metadata().contains()) { m_reqPool->waitAll(); @@ -942,20 +1136,29 @@ static IE::PreProcessInfo configurePreProcInfo(const IE::InputInfo::CPtr& ii, // NB: This is a callback used by async infer // to post outputs blobs (cv::GMat's). -static void PostOutputs(InferenceEngine::InferRequest &request, - std::shared_ptr ctx) { +static void PostOutputs(InferenceEngine::InferRequest &request, + InferenceEngine::StatusCode code, + std::shared_ptr ctx) { GAPI_ITT_STATIC_LOCAL_HANDLE(ie_cb_post_outputs_hndl, "IE_async_callback_PostOutputs"); GAPI_ITT_AUTO_TRACE_GUARD(ie_cb_post_outputs_hndl); - for (auto i : ade::util::iota(ctx->uu.params.num_out)) - { + if (code != IE::StatusCode::OK) { + std::stringstream ss; + ss << "InferRequest for model: " << ctx->uu.params.model_path + << " finished with InferenceEngine::StatusCode: " << static_cast(code); + ctx->eptr = std::make_exception_ptr(std::logic_error(ss.str())); + } + + for (auto i : ade::util::iota(ctx->uu.params.num_out)) { auto& out_mat = ctx->outMatR(i); IE::Blob::Ptr this_blob = request.GetBlob(ctx->uu.params.output_names[i]); copyFromIE(this_blob, out_mat); auto output = ctx->output(i); ctx->out.meta(output, ctx->input(0).meta); - ctx->out.post(std::move(output)); + ctx->out.post(std::move(output), ctx->eptr); } + + ctx->releaseKeepAliveFrame(&request); } class PostOutputsList { @@ -964,7 +1167,9 @@ public: std::shared_ptr ctx, std::vector>&& cached_dims); - void operator()(InferenceEngine::InferRequest &request, size_t pos) const; + void operator()(InferenceEngine::InferRequest &request, + InferenceEngine::StatusCode code, + size_t pos) const; private: struct Priv { @@ -985,20 +1190,30 @@ PostOutputsList::PostOutputsList(size_t size, m_priv->cached_dims = std::move(cached_dims); } -void PostOutputsList::operator()(InferenceEngine::InferRequest &req, size_t pos) const { +void PostOutputsList::operator()(InferenceEngine::InferRequest &req, + InferenceEngine::StatusCode code, + size_t pos) const { auto&& ctx = m_priv->ctx; auto&& cached_dims = m_priv->cached_dims; auto&& finished = m_priv->finished; auto&& size = m_priv->size; - for (auto i : ade::util::iota(ctx->uu.params.num_out)) { - std::vector &out_vec = ctx->outVecR(i); - IE::Blob::Ptr out_blob = req.GetBlob(ctx->uu.params.output_names[i]); - GAPI_Assert(out_blob); + if (code != IE::StatusCode::OK) { + ctx->eptr = std::make_exception_ptr( + std::logic_error("IE::InferRequest finished with not OK status")); + } - // FIXME: Avoid data copy. Not sure if it is possible though - out_vec[pos].create(cached_dims[i], toCV(out_blob->getTensorDesc().getPrecision())); - copyFromIE(out_blob, out_vec[pos]); + if (!ctx->eptr) { + for (auto i : ade::util::iota(ctx->uu.params.num_out)) { + std::vector &out_vec = ctx->outVecR(i); + + IE::Blob::Ptr out_blob = req.GetBlob(ctx->uu.params.output_names[i]); + GAPI_Assert(out_blob); + + // FIXME: Avoid data copy. Not sure if it is possible though + out_vec[pos].create(cached_dims[i], toCV(out_blob->getTensorDesc().getPrecision())); + copyFromIE(out_blob, out_vec[pos]); + } } ++finished; @@ -1006,7 +1221,7 @@ void PostOutputsList::operator()(InferenceEngine::InferRequest &req, size_t pos) for (auto i : ade::util::iota(ctx->uu.params.num_out)) { auto output = ctx->output(i); ctx->out.meta(output, ctx->input(0).meta); - ctx->out.post(std::move(output)); + ctx->out.post(std::move(output), ctx->eptr); } } } @@ -1057,6 +1272,12 @@ struct Infer: public cv::detail::KernelTag { if (isApplicableForResize(ii->getTensorDesc())) { ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); } + + // NB: configure input param for further preproc + if (uu.net_input_params.is_applicable(mm)) { + const_cast(uu.net_input_params) + .set_param(input_name, ii->getTensorDesc()); + } } // FIXME: This isn't the best place to call reshape function. @@ -1076,6 +1297,12 @@ struct Infer: public cv::detail::KernelTag { auto ii = inputs.at(input_name); const auto & mm = std::get<1>(it); non_const_prepm->emplace(input_name, configurePreProcInfo(ii, mm)); + + // NB: configure input param for further preproc + if (uu.net_input_params.is_applicable(mm)) { + const_cast(uu.net_input_params) + .set_param(input_name, ii->getTensorDesc()); + } } } @@ -1114,14 +1341,16 @@ struct Infer: public cv::detail::KernelTag { (layout == IE::Layout::NCHW || layout == IE::Layout::NHWC) ? cv::gapi::ie::TraitAs::IMAGE : cv::gapi::ie::TraitAs::TENSOR; - IE::Blob::Ptr this_blob = extractBlob(*ctx, i, hint); + IE::Blob::Ptr this_blob = extractBlob(*ctx, i, hint, + layer_name, + cv::util::optional{}); setBlob(req, layer_name, this_blob, *ctx); } // FIXME: Should it be done by kernel ? // What about to do that in RequestPool ? req.StartAsync(); }, - std::bind(PostOutputs, _1, ctx) + std::bind(PostOutputs, _1, _2, ctx) } ); } @@ -1169,6 +1398,12 @@ struct InferROI: public cv::detail::KernelTag { if (!input_reshape_table.empty()) { const_cast(&uu.net)->reshape(input_reshape_table); } + + // NB: configure input param for further preproc + if (uu.net_input_params.is_applicable(mm)) { + const_cast(uu.net_input_params) + .set_param(input_name, ii->getTensorDesc()); + } } else { GAPI_Assert(uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Import); auto inputs = uu.this_network.GetInputsInfo(); @@ -1176,6 +1411,12 @@ struct InferROI: public cv::detail::KernelTag { auto* non_const_prepm = const_cast(&uu.preproc_map); auto ii = inputs.at(input_name); non_const_prepm->emplace(input_name, configurePreProcInfo(ii, mm)); + + // NB: configure intput param for further preproc + if (uu.net_input_params.is_applicable(mm)) { + const_cast(uu.net_input_params) + .set_param(input_name, ii->getTensorDesc()); + } } // FIXME: It would be nice here to have an exact number of network's @@ -1205,18 +1446,31 @@ struct InferROI: public cv::detail::KernelTag { GAPI_Assert(ctx->uu.params.num_in == 1); auto&& this_roi = ctx->inArg(0).rref(); + // reserve unique slot for keep alive preprocessed frame + cv::MediaFrame* slot_ptr = ctx->prepareKeepAliveFrameSlot(&req); + // NB: This blob will be used to make roi from its, so // it should be treated as image + bool preprocessed = false; IE::Blob::Ptr this_blob = - extractBlob(*ctx, 1, cv::gapi::ie::TraitAs::IMAGE); - setROIBlob(req, + extractBlob(*ctx, 1, cv::gapi::ie::TraitAs::IMAGE, + *(ctx->uu.params.input_names.begin()), + cv::util::make_optional(this_roi), + slot_ptr, &preprocessed); + if (!preprocessed) { + setROIBlob(req, *(ctx->uu.params.input_names.begin()), this_blob, this_roi, *ctx); + } else { + setBlob(req, + *(ctx->uu.params.input_names.begin()), + this_blob, *ctx); + } // FIXME: Should it be done by kernel ? // What about to do that in RequestPool ? req.StartAsync(); }, - std::bind(PostOutputs, _1, ctx) + std::bind(PostOutputs, _1, _2, ctx) } ); } @@ -1292,7 +1546,6 @@ struct InferList: public cv::detail::KernelTag { static void run(std::shared_ptr ctx, cv::gimpl::ie::RequestPool &reqPool) { - const auto& in_roi_vec = ctx->inArg(0u).rref(); // NB: In case there is no input data need to post output anyway if (in_roi_vec.empty()) { @@ -1306,7 +1559,9 @@ struct InferList: public cv::detail::KernelTag { // NB: This blob will be used to make roi from its, so // it should be treated as image - IE::Blob::Ptr this_blob = extractBlob(*ctx, 1, cv::gapi::ie::TraitAs::IMAGE); + IE::Blob::Ptr this_blob = extractBlob(*ctx, 1, cv::gapi::ie::TraitAs::IMAGE, + ctx->uu.params.input_names[0u], + cv::util::optional{}); std::vector> cached_dims(ctx->uu.params.num_out); for (auto i : ade::util::iota(ctx->uu.params.num_out)) { @@ -1330,11 +1585,10 @@ struct InferList: public cv::detail::KernelTag { reqPool.execute( cv::gimpl::ie::RequestPool::Task { [ctx, rc, this_blob](InferenceEngine::InferRequest &req) { - IE::Blob::Ptr roi_blob = IE::make_shared_blob(this_blob, toIE(rc)); - setBlob(req, ctx->uu.params.input_names[0u], roi_blob, *ctx); + setROIBlob(req, ctx->uu.params.input_names[0u], this_blob, rc, *ctx); req.StartAsync(); }, - std::bind(callback, std::placeholders::_1, pos) + std::bind(callback, std::placeholders::_1, std::placeholders::_2, pos) } ); } @@ -1454,7 +1708,9 @@ struct InferList2: public cv::detail::KernelTag { && "This operation must have at least two arguments"); // NB: This blob will be used to make roi from its, so // it should be treated as image - IE::Blob::Ptr blob_0 = extractBlob(*ctx, 0, cv::gapi::ie::TraitAs::IMAGE); + IE::Blob::Ptr blob_0 = extractBlob(*ctx, 0, cv::gapi::ie::TraitAs::IMAGE, + ctx->uu.params.input_names[0u], + cv::util::optional{}); const auto list_size = ctx->inArg(1u).size(); if (list_size == 0u) { for (auto i : ade::util::iota(ctx->uu.params.num_out)) { @@ -1488,23 +1744,24 @@ struct InferList2: public cv::detail::KernelTag { for (auto in_idx : ade::util::iota(ctx->uu.params.num_in)) { const auto &this_vec = ctx->inArg(in_idx+1u); GAPI_Assert(this_vec.size() == list_size); - IE::Blob::Ptr this_blob; if (this_vec.getKind() == cv::detail::OpaqueKind::CV_RECT) { const auto &vec = this_vec.rref(); - this_blob = IE::make_shared_blob(blob_0, toIE(vec[list_idx])); + setROIBlob(req, ctx->uu.params.input_names[in_idx], + blob_0, vec[list_idx], *ctx); } else if (this_vec.getKind() == cv::detail::OpaqueKind::CV_MAT) { const auto &vec = this_vec.rref(); const auto &mat = vec[list_idx]; - this_blob = wrapIE(mat, cv::gapi::ie::TraitAs::TENSOR); + setBlob(req, ctx->uu.params.input_names[in_idx], + wrapIE(mat, cv::gapi::ie::TraitAs::TENSOR), + *ctx); } else { GAPI_Assert(false && "Only Rect and Mat types are supported for infer list 2!"); } - setBlob(req, ctx->uu.params.input_names[in_idx], this_blob, *ctx); } req.StartAsync(); }, - std::bind(callback, std::placeholders::_1, list_idx) + std::bind(callback, std::placeholders::_1, std::placeholders::_2, list_idx) } // task ); } // for diff --git a/modules/gapi/src/backends/oak/goak.cpp b/modules/gapi/src/backends/oak/goak.cpp index 6d9044aefa..022f8f596f 100644 --- a/modules/gapi/src/backends/oak/goak.cpp +++ b/modules/gapi/src/backends/oak/goak.cpp @@ -7,7 +7,7 @@ #include #include -#include "oak_media_adapter.hpp" +#include "oak_memory_adapters.hpp" #include #include @@ -24,6 +24,10 @@ GFrame sobelXY(const GFrame& in, const cv::Mat& hk, const cv::Mat& vk) { return GSobelXY::on(in, hk, vk); } +GFrame copy(const GFrame& in) { + return GCopy::on(in); +} + // This is a dummy oak::ColorCamera class that just makes our pipelining // machinery work. The real data comes from the physical camera which // is handled by DepthAI library. @@ -31,6 +35,11 @@ ColorCamera::ColorCamera() : m_dummy(cv::MediaFrame::Create()) { } +ColorCamera::ColorCamera(const ColorCameraParams& params) + : m_dummy(cv::MediaFrame::Create()), + m_params(params) { +} + bool ColorCamera::pull(cv::gapi::wip::Data &data) { // FIXME: Avoid passing this formal frame to the pipeline std::this_thread::sleep_for(std::chrono::milliseconds(10)); @@ -39,7 +48,9 @@ bool ColorCamera::pull(cv::gapi::wip::Data &data) { } cv::GMetaArg ColorCamera::descr_of() const { - return cv::GMetaArg{cv::descr_of(m_dummy)}; + // FIXME: support other resolutions + GAPI_Assert(m_params.resolution == ColorCameraParams::Resolution::THE_1080_P); + return cv::GMetaArg{cv::GFrameDesc{cv::MediaFormat::NV12, cv::Size{1920, 1080}}}; } } // namespace oak diff --git a/modules/gapi/src/backends/oak/goak_media_adapter.cpp b/modules/gapi/src/backends/oak/goak_memory_adapters.cpp similarity index 54% rename from modules/gapi/src/backends/oak/goak_media_adapter.cpp rename to modules/gapi/src/backends/oak/goak_memory_adapters.cpp index c8e6bbb59b..5805f7933f 100644 --- a/modules/gapi/src/backends/oak/goak_media_adapter.cpp +++ b/modules/gapi/src/backends/oak/goak_memory_adapters.cpp @@ -4,17 +4,15 @@ // // Copyright (C) 2021 Intel Corporation -#include "oak_media_adapter.hpp" +#include "oak_memory_adapters.hpp" namespace cv { namespace gapi { namespace oak { -OAKMediaAdapter::OAKMediaAdapter(cv::Size sz, cv::MediaFormat fmt, std::vector&& buffer) { +OAKMediaAdapter::OAKMediaAdapter(cv::Size sz, cv::MediaFormat fmt, std::vector&& buffer) +: m_sz(sz), m_fmt(fmt), m_buffer(buffer) { GAPI_Assert(fmt == cv::MediaFormat::NV12 && "OAKMediaAdapter only supports NV12 format for now"); - m_sz = sz; - m_fmt = fmt; - m_buffer = buffer; } MediaFrame::View OAKMediaAdapter::OAKMediaAdapter::access(MediaFrame::Access) { @@ -27,6 +25,30 @@ MediaFrame::View OAKMediaAdapter::OAKMediaAdapter::access(MediaFrame::Access) { cv::GFrameDesc OAKMediaAdapter::OAKMediaAdapter::meta() const { return {m_fmt, m_sz}; } +OAKRMatAdapter::OAKRMatAdapter(const cv::Size& size, + int precision, + std::vector&& buffer) + : m_size(size), m_precision(precision), m_buffer(buffer) { + GAPI_Assert(m_precision == CV_16F); + + std::vector wrapped_dims{1, 1, m_size.width, m_size.height}; + + // FIXME: check layout and add strides + m_desc = cv::GMatDesc(m_precision, wrapped_dims); + m_mat = cv::Mat(static_cast(wrapped_dims.size()), + wrapped_dims.data(), + CV_16FC1, // FIXME: cover other precisions + m_buffer.data()); +} + +cv::GMatDesc OAKRMatAdapter::desc() const { + return m_desc; +} + +cv::RMat::View OAKRMatAdapter::access(cv::RMat::Access) { + return cv::RMat::View{m_desc, m_mat.data}; +} + } // namespace oak } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/oak/goakbackend.cpp b/modules/gapi/src/backends/oak/goakbackend.cpp index dc0daaead3..e159160ba9 100644 --- a/modules/gapi/src/backends/oak/goakbackend.cpp +++ b/modules/gapi/src/backends/oak/goakbackend.cpp @@ -2,9 +2,10 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2021 Intel Corporation +// Copyright (C) 2021-2022 Intel Corporation #include // GKernelPackage +#include // kernels() #ifdef HAVE_OAK @@ -18,21 +19,24 @@ #include #include +#include // GInferBase #include // streaming::meta_tag #include "depthai/depthai.hpp" -#include -#include "oak_media_adapter.hpp" +#include "oak_memory_adapters.hpp" + +#include // infer params namespace cv { namespace gimpl { // Forward declaration class GOAKContext; -struct OAKNodeInfo; +class OAKKernelParams; class GOAKExecutable final: public GIslandExecutable { friend class GOAKContext; + friend class OAKKernelParams; virtual void run(std::vector&&, std::vector&&) override { GAPI_Assert(false && "Not implemented"); @@ -41,7 +45,8 @@ class GOAKExecutable final: public GIslandExecutable { virtual void run(GIslandExecutable::IInput &in, GIslandExecutable::IOutput &out) override; - void LinkToParents(ade::NodeHandle handle); + void linkToParent(ade::NodeHandle handle); + void linkCopy(ade::NodeHandle handle); class ExtractTypeHelper : protected dai::Node { public: @@ -61,6 +66,7 @@ class GOAKExecutable final: public GIslandExecutable { std::shared_ptr xlink_output; std::shared_ptr out_queue; std::string out_queue_name; + size_t gapi_out_data_index; }; cv::GArg packInArg(const GArg &arg, std::vector& oak_ins); @@ -79,11 +85,16 @@ class GOAKExecutable final: public GIslandExecutable { cv::Size m_camera_size; // Backend outputs - std::vector m_out_queues; + std::unordered_map> m_out_queues; // Backend inputs std::vector> m_in_queues; + std::unordered_set> m_passthrough_copy_nodes; + // Note: dai::Pipeline should be the only one for the whole pipeline, // so there is no way to insert any non-OAK node in graph between other OAK nodes. // The only heterogeneous case possible is if we insert other backends after or before @@ -91,6 +102,14 @@ class GOAKExecutable final: public GIslandExecutable { std::unique_ptr m_device; std::unique_ptr m_pipeline; + // Camera config + cv::gapi::oak::ColorCameraParams m_ccp; + + // Infer info + std::unordered_map> m_oak_infer_info; + public: GOAKExecutable(const ade::Graph& g, const cv::GCompileArgs& args, @@ -122,6 +141,12 @@ public: std::vector& args, std::vector& results); + GOAKContext(const std::unique_ptr& pipeline, + const cv::Size& camera_size, + const cv::gapi::oak::detail::ParamDesc& infer_info, + std::vector& args, + std::vector& results); + // Generic accessor API template T& inArg(int input) { return m_args.at(input).get(); } @@ -130,12 +155,14 @@ public: InputPtr& in(int input); OutputPtr& out(int output); - const std::unique_ptr& pipeline(); + const std::unique_ptr& pipeline() const; const cv::Size& camera_size() const; + const cv::gapi::oak::detail::ParamDesc& ii() const; private: const std::unique_ptr& m_pipeline; - const cv::Size& m_camera_size; + const cv::Size m_camera_size; + const cv::gapi::oak::detail::ParamDesc m_infer_info; std::vector& m_args; std::vector& m_outputs; }; @@ -144,9 +171,18 @@ GOAKContext::GOAKContext(const std::unique_ptr& pipeline, const cv::Size& camera_size, std::vector& args, std::vector& results) - : m_pipeline(pipeline), m_camera_size(camera_size), m_args(args), m_outputs(results) {} + : m_pipeline(pipeline), m_camera_size(camera_size), + m_args(args), m_outputs(results) {} -const std::unique_ptr& GOAKContext::pipeline() { +GOAKContext::GOAKContext(const std::unique_ptr& pipeline, + const cv::Size& camera_size, + const cv::gapi::oak::detail::ParamDesc& infer_info, + std::vector& args, + std::vector& results) + : m_pipeline(pipeline), m_camera_size(camera_size), + m_infer_info(infer_info), m_args(args), m_outputs(results) {} + +const std::unique_ptr& GOAKContext::pipeline() const { return m_pipeline; } @@ -154,6 +190,10 @@ const cv::Size& GOAKContext::camera_size() const { return m_camera_size; } +const cv::gapi::oak::detail::ParamDesc& GOAKContext::ii() const { + return m_infer_info; +} + GOAKContext::InputPtr& GOAKContext::in(int input) { return inArg>(input).get(); } @@ -162,6 +202,14 @@ GOAKContext::OutputPtr& GOAKContext::out(int output) { return m_outputs.at(output); } +class OAKKernelParams { +public: + const std::unique_ptr& pipeline; + const cv::Size& camera_size; + const cv::gapi::oak::detail::ParamDesc& infer_info; + std::vector>& in_queues; +}; + namespace detail { template struct get_in; template<> struct get_in { @@ -179,13 +227,10 @@ template<> struct get_out { template struct get_out> { static GOAKContext::OutputPtr& get(GOAKContext &ctx, int idx) { return ctx.out(idx); } }; -// FIXME: add support of other types - -struct OAKKernelParams { - const std::unique_ptr& pipeline; - const cv::Size& camera_size; - std::vector>& m_in_queues; +template<> struct get_out { + static GOAKContext::OutputPtr& get(GOAKContext &ctx, int idx) { return ctx.out(idx); } }; +// FIXME: add support of other types template struct OAKCallHelper; @@ -200,6 +245,7 @@ struct OAKCallHelper, std::tuple > { , cv::detail::Seq) { return Impl::put(OAKKernelParams{ctx.pipeline(), ctx.camera_size(), + ctx.ii(), in_queues_params}, get_in::get(ctx, IIs)..., get_out::get(ctx, OIs)...); @@ -229,23 +275,84 @@ struct OAKComponent static const char *name() { return "OAK Component"; } GOAKKernel k; }; - -}} // namespace gimpl // namespace cv +} // namespace gimpl +} // namespace cv using OAKGraph = ade::TypedGraph - < cv::gimpl::OAKComponent - // FIXME: extend + < cv::gimpl::Protocol + , cv::gimpl::Op + , cv::gimpl::NetworkParams + , cv::gimpl::CustomMetaFunction + // OAK specific + , cv::gimpl::OAKComponent >; using ConstOAKGraph = ade::ConstTypedGraph - < cv::gimpl::OAKComponent - // FIXME: extend + < cv::gimpl::Protocol + , cv::gimpl::Op + , cv::gimpl::NetworkParams + , cv::gimpl::CustomMetaFunction + // OAK specific + , cv::gimpl::OAKComponent >; -// This function links dai operation nodes - parent's output to child's input. +namespace +{ +std::pair +parseDaiInferMeta(const cv::gapi::oak::detail::ParamDesc& pd) { + dai::OpenVINO::Blob blob(pd.blob_file); + + GAPI_Assert(blob.networkInputs.size() == 1); + GAPI_Assert(blob.networkOutputs.size() == 1); + + return {blob.networkInputs.begin()->second, + blob.networkOutputs.begin()->second}; +} + +std::string +getDaiInferOutLayerName(const cv::gapi::oak::detail::ParamDesc& pd) { + dai::OpenVINO::Blob blob(pd.blob_file); + + GAPI_Assert(blob.networkInputs.size() == 1); + GAPI_Assert(blob.networkOutputs.size() == 1); + + return blob.networkOutputs.begin()->first; +} +} // anonymous namespace + +// Custom meta function for OAK backend for infer +static cv::GMetaArgs customOutMeta(const ade::Graph &gr, + const ade::NodeHandle &nh, + const cv::GMetaArgs &/*in_metas*/, + const cv::GArgs &/*in_args*/) { + cv::GMetaArgs result; + const auto &np = ConstOAKGraph(gr).metadata(nh).get(); + const auto &pd = cv::util::any_cast(np.opaque); + + // FIXME: Infer kernel and backend does rather the same + auto in_out_tensor_info = parseDaiInferMeta(pd); + + GAPI_Assert(in_out_tensor_info.second.dataType == + dai::TensorInfo::DataType::FP16); + + // FIXME: add proper layout converter here + GAPI_Assert(in_out_tensor_info.second.order == + dai::TensorInfo::StorageOrder::NCHW); + + // FIXME: DAI returns vector, remove workaround + std::vector wrapped_dims; + for (const auto& d : in_out_tensor_info.second.dims) { + wrapped_dims.push_back(d); + } + result = {cv::GMetaArg{cv::GMatDesc(CV_16F, 1, cv::Size(wrapped_dims[1], wrapped_dims[0]), false)}}; + + return result; +} + +// This function links DAI operation nodes - parent's output to child's input. // It utilizes G-API graph to search for operation's node it's previous operation in graph -// when links them in dai graph. -void cv::gimpl::GOAKExecutable::LinkToParents(ade::NodeHandle handle) +// when links them in DAI graph. +void cv::gimpl::GOAKExecutable::linkToParent(ade::NodeHandle handle) { ade::NodeHandle parent; for (const auto& data_nh : handle.get()->inNodes()) { @@ -253,6 +360,13 @@ void cv::gimpl::GOAKExecutable::LinkToParents(ade::NodeHandle handle) GAPI_Assert(data_nh.get()->inNodes().size() == 1); parent = data_nh.get()->inNodes().front(); + // Don't link if parent is copy - the case is handled differently + // in linkCopy + const auto& op = m_gm.metadata(parent).get(); + if (op.k.name == "org.opencv.oak.copy") { + continue; + } + // Assuming that OAK nodes are aligned for linking. // FIXME: potential rework might be needed then // counterexample is found. @@ -269,6 +383,89 @@ void cv::gimpl::GOAKExecutable::LinkToParents(ade::NodeHandle handle) } } +// This function links DAI operations for Copy OP in G-API graph +void cv::gimpl::GOAKExecutable::linkCopy(ade::NodeHandle handle) { + // 1. Check that there are no back-to-back Copy OPs in graph + auto copy_out = handle.get()->outNodes(); + GAPI_Assert(copy_out.size() == 1); + for (const auto& copy_next_op : copy_out.front().get()->outNodes()) { + const auto& op = m_gm.metadata(copy_next_op).get(); + if (op.k.name == "org.opencv.oak.copy") { + GAPI_Assert(false && "Back-to-back Copy operations are not supported in graph"); + } + } + + // 2. Link passthrough case + if (m_passthrough_copy_nodes.find(handle) != m_passthrough_copy_nodes.end()) { + ExtractTypeHelper::OutputPtr parent; + bool parent_is_camera = false; + // Copy has only 1 input data + GAPI_Assert(handle.get()->inNodes().size() == 1); + auto in_ops = handle.get()->inNodes().front().get()->inNodes(); + if (in_ops.size() == 0) { + // No parent nodes - parent = camera + parent = &m_camera_input->video; + parent_is_camera = true; + } else { + // Data has only 1 input + GAPI_Assert(in_ops.size() == 1); + auto node = m_oak_nodes.at(in_ops.front()); + // Should only have 1 output + GAPI_Assert(node.outputs.size() == 1); + parent = node.outputs[0]; + } + + // Now link DAI parent output to Copy's child's inputs ignoring the Copy operation + // FIXME: simplify this loop + auto copy_out_data = handle.get()->outNodes(); + // Copy has only 1 output + GAPI_Assert(copy_out_data.size() == 1); + for (const auto& copy_next_op : copy_out_data.front().get()->outNodes()) { + if (m_oak_nodes.find(copy_next_op) != m_oak_nodes.end()) { + // FIXME: consider a better approach + if (parent_is_camera) { + if (m_oak_infer_info.find(copy_next_op) != m_oak_infer_info.end()) { + parent = &m_camera_input->preview; + } else { + parent = &m_camera_input->video; + } + } + // Found next Copy OP which needs to be linked to Copy's parent + GAPI_Assert(m_oak_nodes.at(copy_next_op).inputs.size() == 1 && + "Internal OAK nodes are not aligned for linking (Copy operation)"); + parent->link(*(m_oak_nodes.at(copy_next_op).inputs.front())); + } + } + } + + // 3. Link output Copy case + if (m_out_queues.find(handle) != m_out_queues.end()) { + // DAI XLinkOutput node + auto xout = m_out_queues[handle].xlink_output->input; + + // Find parent node + // FIXME: copypasted from case 2 above + ExtractTypeHelper::OutputPtr parent; + // Copy has only 1 input data + GAPI_Assert(handle.get()->inNodes().size() == 1); + auto in_ops = handle.get()->inNodes().front().get()->inNodes(); + if (in_ops.size() == 0) { + // No parent nodes - parent = camera + parent = &m_camera_input->video; + } else { + // Data has only 1 input + GAPI_Assert(in_ops.size() == 1); + auto node = m_oak_nodes.at(in_ops.front()); + // Should only have 1 output + GAPI_Assert(node.outputs.size() == 1); + parent = node.outputs[0]; + } + + // Link parent to xout + parent->link(xout); + } +} + cv::GArg cv::gimpl::GOAKExecutable::packInArg(const GArg &arg, std::vector& oak_ins) { @@ -298,9 +495,8 @@ void cv::gimpl::GOAKExecutable::packOutArg(const RcDesc &rc, std::vector& oak_outs) { switch (rc.shape) { case GShape::GFRAME: - oak_outs.push_back(nullptr); - break; case GShape::GARRAY: + case GShape::GMAT: oak_outs.push_back(nullptr); break; default: @@ -309,6 +505,33 @@ void cv::gimpl::GOAKExecutable::packOutArg(const RcDesc &rc, } } +namespace { +static dai::CameraBoardSocket extractCameraBoardSocket(cv::gapi::oak::ColorCameraParams ccp) { + switch (ccp.board_socket) { + case cv::gapi::oak::ColorCameraParams::BoardSocket::RGB: + return dai::CameraBoardSocket::RGB; + // FIXME: extend + default: + // basically unreachable + GAPI_Assert("Unsupported camera board socket"); + return {}; + } +} + +static dai::ColorCameraProperties::SensorResolution +extractCameraResolution(cv::gapi::oak::ColorCameraParams ccp) { + switch (ccp.resolution) { + case cv::gapi::oak::ColorCameraParams::Resolution::THE_1080_P: + return dai::ColorCameraProperties::SensorResolution::THE_1080_P; + // FIXME: extend + default: + // basically unreachable + GAPI_Assert("Unsupported camera board socket"); + return {}; + } +} +} // anonymous namespace + cv::gimpl::GOAKExecutable::GOAKExecutable(const ade::Graph& g, const cv::GCompileArgs &args, const std::vector& nodes, @@ -344,16 +567,52 @@ cv::gimpl::GOAKExecutable::GOAKExecutable(const ade::Graph& g, } } + m_ccp = cv::gimpl::getCompileArg(args) + .value_or(cv::gapi::oak::ColorCameraParams{}); + // FIXME: change the hard-coded behavior (XLinkIn path) auto camRgb = m_pipeline->create(); // FIXME: extract camera compile arguments here and properly convert them for dai - camRgb->setBoardSocket(dai::CameraBoardSocket::RGB); - camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P); + camRgb->setBoardSocket(extractCameraBoardSocket(m_ccp)); + camRgb->setResolution(extractCameraResolution(m_ccp)); + camRgb->setInterleaved(m_ccp.interleaved); + + // Extract infer params + for (const auto& nh : nodes) { + if (m_gm.metadata(nh).get().t == NodeType::OP) { + if (ConstOAKGraph(m_g).metadata(nh).contains()) { + const auto &np = ConstOAKGraph(m_g).metadata(nh).get(); + const auto &pp = cv::util::any_cast(np.opaque); + m_oak_infer_info[nh] = pp; + break; + } + } + } + + // FIXME: handle multiple infers + if (!m_oak_infer_info.empty()) { + GAPI_Assert(m_oak_infer_info.size() == 1); + // FIXME: move to infer node? + auto in_out_tensor_info = parseDaiInferMeta(m_oak_infer_info.begin()->second); + + if (in_out_tensor_info.first.dataType == + dai::TensorInfo::DataType::FP16 || + in_out_tensor_info.first.dataType == + dai::TensorInfo::DataType::FP32) { + camRgb->setFp16(true); + } else { + camRgb->setFp16(false); + } + + // FIXME: add proper layout converter here + GAPI_Assert(in_out_tensor_info.first.order == + dai::TensorInfo::StorageOrder::NCHW); + camRgb->setPreviewSize(in_out_tensor_info.first.dims[0], in_out_tensor_info.first.dims[1]); + } - // Set camera output. Fixme: consider working with other camera outputs m_camera_input = camRgb; // FIXME: change when other camera censors are introduced - std::tuple video_size = camRgb->getVideoSize(); + std::tuple video_size = m_camera_input->getVideoSize(); m_camera_size = cv::Size{std::get<0>(video_size), std::get<1>(video_size)}; // Prepare XLinkOut nodes for each output object in graph @@ -361,7 +620,23 @@ cv::gimpl::GOAKExecutable::GOAKExecutable(const ade::Graph& g, auto xout = m_pipeline->create(); std::string xout_name = "xout" + std::to_string(i); xout->setStreamName(xout_name); - m_out_queues.push_back({xout, nullptr, xout_name}); + + // Find parent OP's nh + ade::NodeHandle parent_op_nh; + for (const auto& nh : nodes) { + for (const auto& outdata : nh.get()->outNodes()) { + if (m_gm.metadata(outdata).get().t == NodeType::DATA) { + auto rc = m_gm.metadata(outdata).get().rc; + auto shape = m_gm.metadata(outdata).get().shape; + // Match outs_data with the actual operation + if (rc == outs_data[i].rc && shape == outs_data[i].shape) { + parent_op_nh = nh; + } + } + } + } + + m_out_queues[parent_op_nh] = {xout, nullptr, xout_name, i}; } // Create OAK node for each node in this backend @@ -375,33 +650,66 @@ cv::gimpl::GOAKExecutable::GOAKExecutable(const ade::Graph& g, m_oak_nodes.at(nh).inputs.reserve(op.args.size()); m_oak_nodes.at(nh).outputs.reserve(op.outs.size()); + // Copy operation in graph can fall into 3 cases: + // 1) Copy is an output of the island - + // in that case we link it to XLinkOut node from m_out_queues + // 2) Copy is between other two operations in the same OAK island - + // in that case we link its parent operation (could be camera) to + // the child one (those copy operations are placed in m_passthrough_copy_nodes) + // 3) Copy can fall into cases 1) and 2) at the same time + + // Prepare passthrough Copy operations + if (op.k.name == "org.opencv.oak.copy") { + // Copy has only 1 output + auto copy_out = nh.get()->outNodes(); + GAPI_Assert(copy_out.size() == 1); + for (const auto& copy_next_op : copy_out.front().get()->outNodes()) { + // Check that copy is a passthrough OP + if (std::find(nodes.begin(), nodes.end(), copy_next_op) != nodes.end()) { + m_passthrough_copy_nodes.insert(nh); + break; + } + } + } + std::vector in_ctx_args; in_ctx_args.reserve(op.args.size()); for (auto &op_arg : op.args) in_ctx_args.push_back(packInArg(op_arg, - m_oak_nodes.at(nh).inputs)); + m_oak_nodes.at(nh).inputs)); for (auto &&op_out : op.outs) packOutArg(op_out, m_oak_nodes.at(nh).outputs); GAPI_Assert(!m_oak_nodes.at(nh).inputs.empty()); GAPI_Assert(!m_oak_nodes.at(nh).outputs.empty()); - GOAKContext ctx(m_pipeline, m_camera_size, in_ctx_args, m_oak_nodes.at(nh).outputs); - m_oak_nodes.at(nh).node = u.k.m_put_f(ctx, m_in_queues); - GAPI_Assert(m_oak_nodes.at(nh).node != nullptr); + if (ConstOAKGraph(m_g).metadata(nh).contains()) { + GOAKContext ctx(m_pipeline, m_camera_size, m_oak_infer_info[nh], + in_ctx_args, m_oak_nodes.at(nh).outputs); + m_oak_nodes.at(nh).node = u.k.m_put_f(ctx, m_in_queues); + } else { + GOAKContext ctx(m_pipeline, m_camera_size, + in_ctx_args, m_oak_nodes.at(nh).outputs); + m_oak_nodes.at(nh).node = u.k.m_put_f(ctx, m_in_queues); + } // Check that all inputs and outputs are properly filled after constructing kernels // to then link it together // FIXME: add more logging - const auto& node = m_oak_nodes.at(nh); - if (std::any_of(node.inputs.cbegin(), node.inputs.cend(), - [](ExtractTypeHelper::InputPtr ptr) { - return ptr == nullptr; - })) { - GAPI_Assert(false && "DAI input are not set"); - } - if (std::any_of(node.outputs.cbegin(), node.outputs.cend(), - [](ExtractTypeHelper::OutputPtr ptr) { - return ptr == nullptr; - })) { - GAPI_Assert(false && "DAI outputs are not set"); + const auto& node_info = m_oak_nodes.at(nh); + // Copy operations don't set their inputs/outputs properly + if (op.k.name != "org.opencv.oak.copy") { + GAPI_Assert(node_info.node != nullptr); + if (std::any_of(node_info.inputs.cbegin(), node_info.inputs.cend(), + [](ExtractTypeHelper::InputPtr ptr) { + return ptr == nullptr; + })) { + GAPI_Assert(false && "DAI input are not set"); + } + + if (std::any_of(node_info.outputs.cbegin(), node_info.outputs.cend(), + [](ExtractTypeHelper::OutputPtr ptr) { + return ptr == nullptr; + })) { + GAPI_Assert(false && "DAI outputs are not set"); + } } } } @@ -413,15 +721,26 @@ cv::gimpl::GOAKExecutable::GOAKExecutable(const ade::Graph& g, ade::HandleHasher> out_nodes; std::unordered_set> inter_nodes; + std::unordered_set> copy_nodes; // TODO: optimize this loop for (const auto& node : m_oak_nodes) { auto nh = node.first; + // Check if it's a Copy OP - will be handled differently when linking + GAPI_Assert(m_gm.metadata(nh).get().t == NodeType::OP); + const auto& op = m_gm.metadata(nh).get(); + if (op.k.name == "org.opencv.oak.copy") { + copy_nodes.insert(nh); + continue; + } + // Fill input op nodes for (const auto& d : ins_data) { for (const auto& indata : nh.get()->inNodes()) { auto rc = m_gm.metadata(indata).get().rc; - if (rc == d.rc) { + auto shape = m_gm.metadata(indata).get().shape; + if (rc == d.rc && shape == d.shape) { in_nodes.insert(nh); } } @@ -430,7 +749,8 @@ cv::gimpl::GOAKExecutable::GOAKExecutable(const ade::Graph& g, for (const auto& d : outs_data) { for (const auto& outdata : nh.get()->outNodes()) { auto rc = m_gm.metadata(outdata).get().rc; - if (rc == d.rc) { + auto shape = m_gm.metadata(outdata).get().shape; + if (rc == d.rc && shape == d.shape) { out_nodes.insert(nh); } } @@ -446,42 +766,47 @@ cv::gimpl::GOAKExecutable::GOAKExecutable(const ade::Graph& g, // 1. Link input nodes to camera for (const auto& nh : in_nodes) { GAPI_Assert(m_oak_nodes.at(nh).inputs.size() == 1); - // FIXME: covert other camera outputs - m_camera_input->video.link(*(m_oak_nodes.at(nh).inputs[0])); + // FIXME: convert other camera outputs + // Link preview to infer, video to all other nodes + if (m_oak_infer_info.find(nh) == m_oak_infer_info.end()) { + m_camera_input->video.link(*(m_oak_nodes.at(nh).inputs[0])); + } else { + m_camera_input->preview.link(*(m_oak_nodes.at(nh).inputs[0])); + } } // 2. Link output nodes to XLinkOut nodes - size_t out_counter = 0; for (const auto& nh : out_nodes) { - GAPI_Assert(out_counter + m_oak_nodes.at(nh).outputs.size() <= m_out_queues.size()); for (const auto& out : m_oak_nodes.at(nh).outputs) { - out->link(m_out_queues[out_counter++].xlink_output->input); + out->link(m_out_queues[nh].xlink_output->input); } // Input nodes in OAK doesn't have parent operation - just camera (for now) if (in_nodes.find(nh) == in_nodes.end()) { - LinkToParents(nh); + linkToParent(nh); } } // 3. Link internal nodes to their parents for (const auto& nh : inter_nodes) { - // Input nodes in OAK doesn't have parent operation - just camera (for now) - if (in_nodes.find(nh) == in_nodes.end()) { - LinkToParents(nh); - } + linkToParent(nh); + } + + // 4. Link copy nodes + for (const auto& nh : copy_nodes) { + linkCopy(nh); } m_device = std::unique_ptr(new dai::Device(*m_pipeline)); // Prepare OAK output queues GAPI_Assert(m_out_queues.size() == outs_data.size()); - for (const auto out_it : ade::util::indexed(outs_data)) + for (const auto out_it : ade::util::indexed(m_out_queues)) { - auto& q = m_out_queues[ade::util::index(out_it)]; + auto& q = ade::util::value(out_it).second; GAPI_Assert(q.out_queue == nullptr); // shouldn't be not filled till this point // FIXME: add queue parameters - // Currently: 30 - max DAI queue capacity, true - blocking queue - q.out_queue = m_device->getOutputQueue(q.out_queue_name, 30, true); + // Currently: 4 - max DAI queue capacity, true - blocking queue + q.out_queue = m_device->getOutputQueue(q.out_queue_name, 4, true); } } @@ -507,17 +832,25 @@ void cv::gimpl::GOAKExecutable::run(GIslandExecutable::IInput &in, q->send(in_q.second); } - for (size_t i = 0; i < m_out_queues.size(); ++i) { - auto q = m_out_queues[i].out_queue; - // TODO: support other DAI types if needed - // Note: we utilize getData() method that returns std::vector of data - // on which we gain ownership - auto oak_frame = q->get(); + for (size_t i = 0; i < m_in_queues.size(); ++i) { + auto q = m_device->getInputQueue(m_in_queues[i].first); + q->send(m_in_queues[i].second); + } - auto out_arg = out.get(i); + for (const auto el : m_out_queues) { + const auto out_q = el.second; + auto& q = out_q.out_queue; + + auto out_arg = out.get(out_q.gapi_out_data_index); + + // FIXME: misc info to be utilized in switch below + cv::GRunArg::Meta meta; + std::shared_ptr oak_frame; switch(out_arg.index()) { case cv::GRunArgP::index_of(): + { + oak_frame = q->get(); // FIXME: hard-coded NV12 *cv::util::get(out_arg) = cv::MediaFrame::Create( @@ -525,58 +858,63 @@ void cv::gimpl::GOAKExecutable::run(GIslandExecutable::IInput &in, static_cast(oak_frame->getHeight())), cv::MediaFormat::NV12, std::move(oak_frame->getData())); + + using namespace cv::gapi::streaming::meta_tag; + meta[timestamp] = oak_frame->getTimestamp(); + meta[seq_id] = oak_frame->getSequenceNum(); + break; + } case cv::GRunArgP::index_of(): + { + oak_frame = q->get(); cv::util::get(out_arg).wref() = std::move(oak_frame->getData()); + + using namespace cv::gapi::streaming::meta_tag; + meta[timestamp] = oak_frame->getTimestamp(); + meta[seq_id] = oak_frame->getSequenceNum(); + break; + } + case cv::GRunArgP::index_of(): // only supported for infer + { + auto nn_data = q->get(); + + auto out_layer_name = getDaiInferOutLayerName(m_oak_infer_info.begin()->second); + auto in_out_tensor_info = parseDaiInferMeta(m_oak_infer_info.begin()->second); + + auto layer = std::move(nn_data->getLayerFp16(out_layer_name)); + + // FIXME: add proper layout converter here + GAPI_Assert(in_out_tensor_info.second.order == + dai::TensorInfo::StorageOrder::NCHW); + // FIMXE: only 1-channel data is supported for now + GAPI_Assert(in_out_tensor_info.second.dims[2] == 1); + + *cv::util::get(out_arg) = + cv::make_rmat( + cv::Size(in_out_tensor_info.second.dims[1], + in_out_tensor_info.second.dims[0]), + CV_16F, // FIXME: cover other precisions + std::move(layer) + ); + + using namespace cv::gapi::streaming::meta_tag; + meta[timestamp] = nn_data->getTimestamp(); + meta[seq_id] = nn_data->getSequenceNum(); + + break; + } // FIXME: Add support for remaining types default: GAPI_Assert(false && "Unsupported type in OAK backend"); } - using namespace cv::gapi::streaming::meta_tag; - cv::GRunArg::Meta meta; - meta[timestamp] = oak_frame->getTimestamp(); - meta[seq_id] = oak_frame->getSequenceNum(); - out.meta(out_arg, meta); out.post(std::move(out_arg)); } } -// Built-in kernels for OAK ///////////////////////////////////////////////////// - -class GOAKBackendImpl final : public cv::gapi::GBackend::Priv { - virtual void unpackKernel(ade::Graph &graph, - const ade::NodeHandle &op_node, - const cv::GKernelImpl &impl) override { - OAKGraph gm(graph); - - const auto &kimpl = cv::util::any_cast(impl.opaque); - gm.metadata(op_node).set(cv::gimpl::OAKComponent{kimpl}); - } - - virtual EPtr compile(const ade::Graph &graph, - const cv::GCompileArgs &args, - const std::vector &nodes, - const std::vector& ins_data, - const std::vector& outs_data) const override { - cv::gimpl::GModel::ConstGraph gm(graph); - // FIXME: pass streaming/non-streaming option to support non-camera case - // NB: how could we have non-OAK source in streaming mode, then OAK backend in - // streaming mode but without camera input? - if (!gm.metadata().contains()) { - GAPI_Assert(false && "OAK backend only supports Streaming mode for now"); - } - return EPtr{new cv::gimpl::GOAKExecutable(graph, args, nodes, ins_data, outs_data)}; - } -}; - -cv::gapi::GBackend cv::gapi::oak::backend() { - static cv::gapi::GBackend this_backend(std::make_shared()); - return this_backend; -} - namespace cv { namespace gimpl { namespace oak { @@ -604,10 +942,13 @@ static dai::VideoEncoderProperties::Profile convertEncProfile(cv::gapi::oak::Enc // Kernels /////////////////////////////////////////////////////////////// -template -class GOAKKernelImpl: public detail::OAKCallHelper +// FIXME: consider a better solution - hard-coded API +// Is there a way to extract API from somewhereelse/utilize structs +// like in streaming/infer backends (mainly infer and copy operations) +template +class GOAKKernelImpl: public detail::OAKCallHelper , public cv::detail::KernelTag { - using P = detail::OAKCallHelper; + using P = detail::OAKCallHelper; public: using API = K; static cv::gapi::GBackend backend() { return cv::gapi::oak::backend(); } @@ -617,17 +958,51 @@ public: #define GAPI_OAK_KERNEL(Name, API) \ struct Name: public cv::gimpl::oak::GOAKKernelImpl +#define GAPI_OAK_FIXED_API_KERNEL(Name, API, InArgs, OutArgs) \ + struct Name: public cv::gimpl::oak::GOAKKernelImpl + namespace { +GAPI_OAK_FIXED_API_KERNEL(GOAKInfer, cv::GInferBase, std::tuple, std::tuple) { + static std::shared_ptr put(const cv::gimpl::OAKKernelParams& params, + GOAKContext::InputPtr& in, + GOAKContext::OutputPtr& out) { + auto nn = params.pipeline->create(); + + nn->input.setBlocking(true); + nn->input.setQueueSize(1); + + // FIXME: add G-API built-in preproc here (currently it's only setPreviewSize() on the camera node) + // Note: for some reason currently it leads to: + // "Fatal error. Please report to developers. Log: 'ImageManipHelper' '61'" + + nn->setBlobPath(params.infer_info.blob_file); + + in = &(nn->input); + out = &(nn->out); + + return nn; + } +}; + +GAPI_OAK_KERNEL(GOAKCopy, cv::gapi::oak::GCopy) { + static std::shared_ptr put(const cv::gimpl::OAKKernelParams&, + GOAKContext::InputPtr&, + GOAKContext::OutputPtr&) { + // Do nothing in Copy OP since it's either already represented + // by XLinkOut node (bonded to output queues) or it's a passthrough OP + return nullptr; + } +}; + GAPI_OAK_KERNEL(GOAKEncFrame, cv::gapi::oak::GEncFrame) { - static std::shared_ptr put(const cv::gimpl::detail::OAKKernelParams& params, + static std::shared_ptr put(const cv::gimpl::OAKKernelParams& params, GOAKContext::InputPtr& in, const cv::gapi::oak::EncoderConfig& cfg, GOAKContext::OutputPtr& out) { auto videoEnc = params.pipeline->create(); // FIXME: convert all the parameters to dai - videoEnc->setDefaultProfilePreset(cfg.width, cfg.height, - cfg.frameRate, + videoEnc->setDefaultProfilePreset(cfg.frameRate, convertEncProfile(cfg.profile)); in = &(videoEnc->input); @@ -638,7 +1013,7 @@ GAPI_OAK_KERNEL(GOAKEncFrame, cv::gapi::oak::GEncFrame) { }; GAPI_OAK_KERNEL(GOAKSobelXY, cv::gapi::oak::GSobelXY) { - static std::shared_ptr put(const cv::gimpl::detail::OAKKernelParams& params, + static std::shared_ptr put(const cv::gimpl::OAKKernelParams& params, GOAKContext::InputPtr& in, const cv::Mat& hk, const cv::Mat& vk, @@ -664,7 +1039,7 @@ GAPI_OAK_KERNEL(GOAKSobelXY, cv::gapi::oak::GSobelXY) { xinEdgeCfg->out.link(edgeDetector->inputConfig); - params.m_in_queues.push_back({"sobel_cfg", cfg}); + params.in_queues.push_back({"sobel_cfg", cfg}); in = &(edgeDetector->inputImage); out = &(edgeDetector->outputImage); @@ -672,11 +1047,55 @@ GAPI_OAK_KERNEL(GOAKSobelXY, cv::gapi::oak::GSobelXY) { return edgeDetector; } }; + } // anonymous namespace } // namespace oak } // namespace gimpl } // namespace cv +class GOAKBackendImpl final : public cv::gapi::GBackend::Priv { + virtual void unpackKernel(ade::Graph &graph, + const ade::NodeHandle &op_node, + const cv::GKernelImpl &impl) override { + using namespace cv::gimpl; + + OAKGraph gm(graph); + + const auto &kimpl = cv::util::any_cast(impl.opaque); + gm.metadata(op_node).set(OAKComponent{kimpl}); + + // Set custom meta for infer + if (gm.metadata(op_node).contains()) { + gm.metadata(op_node).set(CustomMetaFunction{customOutMeta}); + } + } + + virtual EPtr compile(const ade::Graph &graph, + const cv::GCompileArgs &args, + const std::vector &nodes, + const std::vector& ins_data, + const std::vector& outs_data) const override { + cv::gimpl::GModel::ConstGraph gm(graph); + // FIXME: pass streaming/non-streaming option to support non-camera case + // NB: how could we have non-OAK source in streaming mode, then OAK backend in + // streaming mode but without camera input? + if (!gm.metadata().contains()) { + GAPI_Assert(false && "OAK backend only supports Streaming mode for now"); + } + return EPtr{new cv::gimpl::GOAKExecutable(graph, args, nodes, ins_data, outs_data)}; + } + + virtual cv::GKernelPackage auxiliaryKernels() const override { + return cv::gapi::kernels< cv::gimpl::oak::GOAKInfer + >(); + } +}; + +cv::gapi::GBackend cv::gapi::oak::backend() { + static cv::gapi::GBackend this_backend(std::make_shared()); + return this_backend; +} + namespace cv { namespace gapi { namespace oak { @@ -684,6 +1103,7 @@ namespace oak { cv::gapi::GKernelPackage kernels() { return cv::gapi::kernels< cv::gimpl::oak::GOAKEncFrame , cv::gimpl::oak::GOAKSobelXY + , cv::gimpl::oak::GOAKCopy >(); } @@ -697,13 +1117,17 @@ namespace cv { namespace gapi { namespace oak { -cv::gapi::GKernelPackage kernels(); - cv::gapi::GKernelPackage kernels() { GAPI_Assert(false && "Built without OAK support"); return {}; } +cv::gapi::GBackend backend() { + GAPI_Assert(false && "Built without OAK support"); + static cv::gapi::GBackend this_backend(nullptr); + return this_backend; +} + } // namespace oak } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/oak/oak_media_adapter.hpp b/modules/gapi/src/backends/oak/oak_memory_adapters.hpp similarity index 56% rename from modules/gapi/src/backends/oak/oak_media_adapter.hpp rename to modules/gapi/src/backends/oak/oak_memory_adapters.hpp index 9c81f5a953..69677978d4 100644 --- a/modules/gapi/src/backends/oak/oak_media_adapter.hpp +++ b/modules/gapi/src/backends/oak/oak_memory_adapters.hpp @@ -10,11 +10,15 @@ #include #include +#include namespace cv { namespace gapi { namespace oak { +// Used for OAK backends outputs only. +// Filled from DepthAI's ImgFrame type and owns the memory. +// Used mainly for CV operations. class GAPI_EXPORTS OAKMediaAdapter final : public cv::MediaFrame::IAdapter { public: OAKMediaAdapter() = default; @@ -28,6 +32,24 @@ private: std::vector m_buffer; }; +// Used for OAK backends outputs only. +// Filled from DepthAI's NNData type and owns the memory. +// Used only for infer operations. +class GAPI_EXPORTS OAKRMatAdapter final : public cv::RMat::Adapter { +public: + OAKRMatAdapter() = default; + OAKRMatAdapter(const cv::Size& size, int precision, std::vector&& buffer); + cv::GMatDesc desc() const override; + cv::RMat::View access(cv::RMat::Access) override; + ~OAKRMatAdapter() = default; +private: + cv::Size m_size; + int m_precision; + std::vector m_buffer; + cv::GMatDesc m_desc; + cv::Mat m_mat; +}; + } // namespace oak } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/onnx/gonnxbackend.cpp b/modules/gapi/src/backends/onnx/gonnxbackend.cpp index 2ab6610378..af1f7f8948 100644 --- a/modules/gapi/src/backends/onnx/gonnxbackend.cpp +++ b/modules/gapi/src/backends/onnx/gonnxbackend.cpp @@ -1102,7 +1102,7 @@ struct InferList2: public cv::detail::KernelTag { } else { GAPI_Assert(false && "Only Rect and Mat types are supported for infer list 2!"); } - // }}} (Preapre input) + // }}} (Prepare input) } // }}} (For every input of the net) std::vector out_mats(uu.oc->numOutputs()); diff --git a/modules/gapi/src/backends/render/ft_render.cpp b/modules/gapi/src/backends/render/ft_render.cpp index fcf84713ff..3c4ae2077f 100644 --- a/modules/gapi/src/backends/render/ft_render.cpp +++ b/modules/gapi/src/backends/render/ft_render.cpp @@ -80,7 +80,7 @@ cv::Size cv::gapi::wip::draw::FTTextRender::Priv::getTextSize(const std::wstring // See (1) on picture. // // 4) As we can see the last pen position is isn't horizontal size yet. - // We need to check if the glyph goes beyound the last position of the pen + // We need to check if the glyph goes beyond the last position of the pen // To do this we can: // a) Return to the previous position -advance // b) Shift on left value +left diff --git a/modules/gapi/src/backends/streaming/gstreamingbackend.cpp b/modules/gapi/src/backends/streaming/gstreamingbackend.cpp index 4bd2a10ea5..69b5f6c72b 100644 --- a/modules/gapi/src/backends/streaming/gstreamingbackend.cpp +++ b/modules/gapi/src/backends/streaming/gstreamingbackend.cpp @@ -172,6 +172,7 @@ void Copy::Actor::run(cv::gimpl::GIslandExecutable::IInput &in, return; } + GAPI_DbgAssert(cv::util::holds_alternative(in_msg)); const cv::GRunArgs &in_args = cv::util::get(in_msg); GAPI_Assert(in_args.size() == 1u); @@ -212,6 +213,7 @@ public: return; } + GAPI_Assert(cv::util::holds_alternative(in_msg)); const cv::GRunArgs &in_args = cv::util::get(in_msg); GAPI_Assert(in_args.size() == 1u); auto frame = cv::util::get(in_args[0]); diff --git a/modules/gapi/src/compiler/gislandmodel.cpp b/modules/gapi/src/compiler/gislandmodel.cpp index 1a8e0939e2..0567a90e3a 100644 --- a/modules/gapi/src/compiler/gislandmodel.cpp +++ b/modules/gapi/src/compiler/gislandmodel.cpp @@ -346,9 +346,9 @@ std::string GIslandModel::traceIslandName(const ade::NodeHandle& island_nh, cons auto& backend_impl = island_ptr->backend().priv(); std::string backend_impl_type_name = typeid(backend_impl).name(); - // NOTE: Major part of already existing backends implementaion classes are called using + // NOTE: Major part of already existing backends implementation classes are called using // "*G[Name]BackendImpl*" pattern. - // We are trying to match against this pattern and retrive just [Name] part. + // We are trying to match against this pattern and retrieve just [Name] part. // If matching isn't successful, full mangled class name will be used. // // To match we use following algorithm: @@ -412,7 +412,17 @@ void GIslandExecutable::run(GIslandExecutable::IInput &in, GIslandExecutable::IO out_objs.emplace_back(ade::util::value(it), out.get(ade::util::checked_cast(ade::util::index(it)))); } - run(std::move(in_objs), std::move(out_objs)); + + try { + run(std::move(in_objs), std::move(out_objs)); + } catch (...) { + auto eptr = std::current_exception(); + for (auto &&it: out_objs) + { + out.post(std::move(it.second), eptr); + } + return; + } // Propagate in-graph meta down to the graph // Note: this is not a complete implementation! Mainly this is a stub diff --git a/modules/gapi/src/compiler/gislandmodel.hpp b/modules/gapi/src/compiler/gislandmodel.hpp index 063504a922..565b3c4f21 100644 --- a/modules/gapi/src/compiler/gislandmodel.hpp +++ b/modules/gapi/src/compiler/gislandmodel.hpp @@ -161,7 +161,12 @@ public: const std::vector &desc() const { return d; } }; struct EndOfStream {}; -using StreamMsg = cv::util::variant; + +struct Exception { + std::exception_ptr eptr; +}; + +using StreamMsg = cv::util::variant; struct GIslandExecutable::IInput: public GIslandExecutable::IODesc { virtual ~IInput() = default; virtual StreamMsg get() = 0; // Get a new input vector (blocking) @@ -169,9 +174,11 @@ struct GIslandExecutable::IInput: public GIslandExecutable::IODesc { }; struct GIslandExecutable::IOutput: public GIslandExecutable::IODesc { virtual ~IOutput() = default; - virtual GRunArgP get(int idx) = 0; // Allocate (wrap) a new data object for output idx - virtual void post(GRunArgP&&) = 0; // Release the object back to the framework (mark available) - virtual void post(EndOfStream&&) = 0; // Post end-of-stream marker back to the framework + virtual GRunArgP get(int idx) = 0; // Allocate (wrap) a new data object for output idx + virtual void post(GRunArgP&&, const std::exception_ptr& = {}) = 0; // Release the object back to the framework (mark available) + virtual void post(EndOfStream&&) = 0; // Post end-of-stream marker back to the framework + virtual void post(Exception&&) = 0; + // Assign accumulated metadata to the given output object. // This method can only be called after get() and before post(). diff --git a/modules/gapi/src/compiler/gmodel.hpp b/modules/gapi/src/compiler/gmodel.hpp index d016766fb5..592678dae7 100644 --- a/modules/gapi/src/compiler/gmodel.hpp +++ b/modules/gapi/src/compiler/gmodel.hpp @@ -70,7 +70,7 @@ struct Data // FIXME: This is a _pure_ duplication of RcDesc now! (except storage) GShape shape; // FIXME: Probably to be replaced by GMetaArg? - int rc; + int rc; // rc is unique but local to shape GMetaArg meta; HostCtor ctor; // T-specific helper to deal with unknown types in our code cv::detail::OpaqueKind kind; // FIXME: is needed to store GArray/GOpaque type diff --git a/modules/gapi/src/compiler/passes/intrin.cpp b/modules/gapi/src/compiler/passes/intrin.cpp index 56f2db69e0..8920be6d4e 100644 --- a/modules/gapi/src/compiler/passes/intrin.cpp +++ b/modules/gapi/src/compiler/passes/intrin.cpp @@ -254,7 +254,7 @@ void apply(cv::gimpl::GModel::Graph &g) { } // Probably the simplest case: desync makes no sense in the regular -// compilation process, so just drop all its occurences in the graph, +// compilation process, so just drop all its occurrences in the graph, // reconnecting nodes properly. void drop(cv::gimpl::GModel::Graph &g) { // FIXME: LOG here that we're dropping the desync operations as diff --git a/modules/gapi/src/compiler/passes/kernels.cpp b/modules/gapi/src/compiler/passes/kernels.cpp index 2776c60359..22b791650f 100644 --- a/modules/gapi/src/compiler/passes/kernels.cpp +++ b/modules/gapi/src/compiler/passes/kernels.cpp @@ -212,7 +212,7 @@ void cv::gimpl::passes::resolveKernels(ade::passes::PassContext &ctx, GAPI_Assert(op.k.outMeta == nullptr); const_cast(op.k.outMeta) = selected_impl.outMeta; } else { - // Sanity check: the metadata funciton must be present + // Sanity check: the metadata function must be present GAPI_Assert(op.k.outMeta != nullptr); } } diff --git a/modules/gapi/src/executor/gexecutor.cpp b/modules/gapi/src/executor/gexecutor.cpp index 6c15d1dfc9..472abaaa14 100644 --- a/modules/gapi/src/executor/gexecutor.cpp +++ b/modules/gapi/src/executor/gexecutor.cpp @@ -30,10 +30,11 @@ cv::gimpl::GExecutor::GExecutor(std::unique_ptr &&g_model) // 1. Allocate all internal resources first (NB - CPU plugin doesn't do it) // 2. Put input/output GComputation arguments to the storage // 3. For every Island, prepare vectors of input/output parameter descs - // 4. Iterate over a list of operations (sorted in the topological order) - // 5. For every operation, form a list of input/output data objects - // 6. Run GIslandExecutable - // 7. writeBack + // 4. Ask every GIslandExecutable to prepare its internal states for a new stream + // 5. Iterate over a list of operations (sorted in the topological order) + // 6. For every operation, form a list of input/output data objects + // 7. Run GIslandExecutable + // 8. writeBack auto sorted = m_gim.metadata().get(); for (auto nh : sorted.nodes()) @@ -82,6 +83,9 @@ cv::gimpl::GExecutor::GExecutor(std::unique_ptr &&g_model) break; } // switch(kind) } // for(gim nodes) + + // (4) + prepareForNewStream(); } namespace cv { @@ -149,7 +153,7 @@ void writeBackExec(const Mag& mag, const RcDesc &rc, GRunArgP &g_arg) { case GRunArgP::index_of() : { // If there is a copy intrinsic at the end of the graph - // we need to actualy copy the data to the user buffer + // we need to actually copy the data to the user buffer // since output runarg was optimized to simply point // to the input of the copy kernel // FIXME: @@ -270,6 +274,7 @@ class cv::gimpl::GExecutor::Output final: public cv::gimpl::GIslandExecutable::I { cv::gimpl::Mag &mag; std::unordered_map out_idx; + std::exception_ptr eptr; GRunArgP get(int idx) override { @@ -278,8 +283,18 @@ class cv::gimpl::GExecutor::Output final: public cv::gimpl::GIslandExecutable::I out_idx[cv::gimpl::proto::ptr(r)] = idx; return r; } - void post(GRunArgP&&) override { } // Do nothing here + void post(GRunArgP&&, const std::exception_ptr& e) override + { + if (e) + { + eptr = e; + } + } void post(EndOfStream&&) override {} // Do nothing here too + void post(Exception&& ex) override + { + eptr = std::move(ex.eptr); + } void meta(const GRunArgP &out, const GRunArg::Meta &m) override { const auto idx = out_idx.at(cv::gimpl::proto::ptr(out)); @@ -291,6 +306,14 @@ public: { set(rcs); } + + void verify() + { + if (eptr) + { + std::rethrow_exception(eptr); + } + } }; void cv::gimpl::GExecutor::run(cv::gimpl::GRuntimeArgs &&args) @@ -382,16 +405,18 @@ void cv::gimpl::GExecutor::run(cv::gimpl::GRuntimeArgs &&args) magazine::resetInternalData(m_res, data); } - // Run the script + // Run the script (5) for (auto &op : m_ops) { - // (5), (6) + // (6), (7) Input i{m_res, op.in_objects}; Output o{m_res, op.out_objects}; op.isl_exec->run(i, o); + // NB: Check if execution finished without exception. + o.verify(); } - // (7) + // (8) for (auto it : ade::util::zip(ade::util::toRange(proto.outputs), ade::util::toRange(args.outObjs))) { diff --git a/modules/gapi/src/executor/gstreamingexecutor.cpp b/modules/gapi/src/executor/gstreamingexecutor.cpp index a3a2746acc..557e5ceee4 100644 --- a/modules/gapi/src/executor/gstreamingexecutor.cpp +++ b/modules/gapi/src/executor/gstreamingexecutor.cpp @@ -31,6 +31,8 @@ #include #include +#include + namespace { using namespace cv::gimpl::stream; @@ -310,14 +312,13 @@ class QueueReader const std::size_t this_id); public: - bool getInputVector (std::vector &in_queues, - cv::GRunArgs &in_constants, - cv::GRunArgs &isl_inputs); + cv::gimpl::StreamMsg getInputVector (std::vector &in_queues, + cv::GRunArgs &in_constants); - bool getResultsVector(std::vector &in_queues, - const std::vector &in_mapping, - const std::size_t out_size, - cv::GRunArgs &out_results); + using V = cv::util::variant; + V getResultsVector(std::vector &in_queues, + const std::vector &in_mapping, + const std::size_t out_size); }; void rewindToStop(std::vector &in_queues, @@ -369,9 +370,8 @@ void QueueReader::rewindToStop(std::vector &in_queues, ::rewindToStop(in_queues, this_id); } -bool QueueReader::getInputVector(std::vector &in_queues, - cv::GRunArgs &in_constants, - cv::GRunArgs &isl_inputs) +cv::gimpl::StreamMsg QueueReader::getInputVector(std::vector &in_queues, + cv::GRunArgs &in_constants) { // NB: Need to release resources from the previous step, to fetch new ones. // On some systems it might be impossible to allocate new memory @@ -381,72 +381,98 @@ bool QueueReader::getInputVector(std::vector &in_queues, // lifetime, keep the whole cmd vector (of size == # of inputs) // in memory. m_cmd.resize(in_queues.size()); - isl_inputs.resize(in_queues.size()); + cv::GRunArgs isl_inputs(in_queues.size()); + cv::optional exception; for (auto &&it : ade::util::indexed(in_queues)) { - auto id = ade::util::index(it); - auto &q = ade::util::value(it); + auto id = ade::util::index(it); + auto &q = ade::util::value(it); - if (q == nullptr) - { - GAPI_Assert(!in_constants.empty()); - // NULL queue means a graph-constant value (like a - // value-initialized scalar) - // It can also hold a constant value received with - // Stop::Kind::CNST message (see above). - isl_inputs[id] = in_constants[id]; - continue; - } + if (q == nullptr) + { + GAPI_Assert(!in_constants.empty()); + // NULL queue means a graph-constant value (like a + // value-initialized scalar) + // It can also hold a constant value received with + // Stop::Kind::CNST message (see above). + isl_inputs[id] = in_constants[id]; + continue; + } - q->pop(m_cmd[id]); - if (!cv::util::holds_alternative(m_cmd[id])) - { - isl_inputs[id] = cv::util::get(m_cmd[id]); - } - else // A Stop sign - { - const auto &stop = cv::util::get(m_cmd[id]); - if (stop.kind == Stop::Kind::CNST) - { - // We've got a Stop signal from a const source, - // propagated as a result of real stream reaching its - // end. Sometimes these signals come earlier than - // real EOS Stops so are deprioritized -- just - // remember the Const value here and continue - // processing other queues. Set queue pointer to - // nullptr and update the const_val vector - // appropriately - m_finishing = true; - in_queues[id] = nullptr; - in_constants.resize(in_queues.size()); - in_constants[id] = std::move(stop.cdata); + q->pop(m_cmd[id]); + switch (m_cmd[id].index()) + { + case Cmd::index_of(): + isl_inputs[id] = cv::util::get(m_cmd[id]); + break; + case Cmd::index_of(): + { + const auto &stop = cv::util::get(m_cmd[id]); + if (stop.kind == Stop::Kind::CNST) + { + // We've got a Stop signal from a const source, + // propagated as a result of real stream reaching its + // end. Sometimes these signals come earlier than + // real EOS Stops so are deprioritized -- just + // remember the Const value here and continue + // processing other queues. Set queue pointer to + // nullptr and update the const_val vector + // appropriately + m_finishing = true; + in_queues[id] = nullptr; + in_constants.resize(in_queues.size()); + in_constants[id] = std::move(stop.cdata); - // NEXT time (on a next call to getInputVector()), the - // "q==nullptr" check above will be triggered, but now - // we need to make it manually: - isl_inputs[id] = in_constants[id]; - } - else - { - GAPI_Assert(stop.kind == Stop::Kind::HARD); - rewindToStop(in_queues, id); - // After queues are read to the proper indicator, - // indicate end-of-stream - return false; - } // if(Cnst) - } // if(Stop) + // NEXT time (on a next call to getInputVector()), the + // "q==nullptr" check above will be triggered, but now + // we need to make it manually: + isl_inputs[id] = in_constants[id]; + } + else + { + GAPI_Assert(stop.kind == Stop::Kind::HARD); + rewindToStop(in_queues, id); + // After queues are read to the proper indicator, + // indicate end-of-stream + return cv::gimpl::StreamMsg{cv::gimpl::EndOfStream{}}; + } // if(Cnst) + break; + } + case Cmd::index_of(): + { + exception = + cv::util::make_optional(cv::util::get(m_cmd[id])); + break; + } + default: + GAPI_Assert(false && "Unsupported cmd type in getInputVector()"); + } } // for(in_queues) + if (exception.has_value()) { + return cv::gimpl::StreamMsg{exception.value()}; + } + if (m_finishing) { // If the process is about to end (a soft Stop was received // already) and an island has no other inputs than constant // inputs, its queues may all become nullptrs. Indicate it as // "no data". - return !ade::util::all_of(in_queues, [](Q *ptr){return ptr == nullptr;}); + if (ade::util::all_of(in_queues, [](Q *ptr){return ptr == nullptr;})) { + return cv::gimpl::StreamMsg{cv::gimpl::EndOfStream{}}; + } } - return true; // A regular case - there is data to process. + // A regular case - there is data to process + for (auto& arg : isl_inputs) { + if (arg.index() == cv::GRunArg::index_of()) { + arg = cv::GRunArg{ cv::make_rmat(cv::util::get(arg)) + , arg.meta + }; + } + } + return cv::gimpl::StreamMsg{std::move(isl_inputs)}; } // This is a special method to obtain a result vector @@ -474,33 +500,47 @@ bool QueueReader::getInputVector(std::vector &in_queues, // (_may be_ partially filled) to the same final output queue. // The receiver part at the GStreamingExecutor level won't change // because of that. -bool QueueReader::getResultsVector(std::vector &in_queues, - const std::vector &in_mapping, - const std::size_t out_size, - cv::GRunArgs &out_results) + +QueueReader::V QueueReader::getResultsVector(std::vector &in_queues, + const std::vector &in_mapping, + const std::size_t out_size) { + cv::GRunArgs out_results(out_size); m_cmd.resize(out_size); + cv::optional exception; for (auto &&it : ade::util::indexed(in_queues)) { auto ii = ade::util::index(it); auto oi = in_mapping[ii]; auto &q = ade::util::value(it); q->pop(m_cmd[oi]); - if (!cv::util::holds_alternative(m_cmd[oi])) - { - out_results[oi] = std::move(cv::util::get(m_cmd[oi])); - } - else // A Stop sign - { - // In theory, the CNST should never reach here. - // Collector thread never handles the inputs directly - // (collector's input queues are always produced by - // islands in the graph). - rewindToStop(in_queues, ii); - return false; - } // if(Stop) + + switch (m_cmd[oi].index()) { + case Cmd::index_of(): + out_results[oi] = std::move(cv::util::get(m_cmd[oi])); + break; + case Cmd::index_of(): + // In theory, the CNST should never reach here. + // Collector thread never handles the inputs directly + // (collector's input queues are always produced by + // islands in the graph). + rewindToStop(in_queues, ii); + return QueueReader::V(Stop{}); + case Cmd::index_of(): + exception = + cv::util::make_optional(cv::util::get(m_cmd[oi])); + break; + default: + cv::util::throw_error( + std::logic_error("Unexpected cmd kind in getResultsVector")); + } // switch } // for(in_queues) - return true; + + if (exception.has_value()) { + return QueueReader::V(exception.value()); + } + + return QueueReader::V(out_results); } @@ -521,7 +561,9 @@ void emitterActorThread(std::shared_ptr emitter, || cv::util::holds_alternative(cmd)); if (cv::util::holds_alternative(cmd)) { - for (auto &&oq : out_queues) oq->push(cmd); + for (auto &&oq : out_queues) { + oq->push(cmd); + } return; } @@ -547,10 +589,21 @@ void emitterActorThread(std::shared_ptr emitter, // Try to obtain next data chunk from the source cv::GRunArg data; - const bool result = [&](){ - GAPI_ITT_AUTO_TRACE_GUARD(emitter_pull_hndl); - return emitter->pull(data); - }(); + bool result = false; + try { + result = [&](){ + GAPI_ITT_AUTO_TRACE_GUARD(emitter_pull_hndl); + return emitter->pull(data); + }(); + } catch (...) { + auto eptr = std::current_exception(); + for (auto &&oq : out_queues) + { + oq->push(Cmd{cv::gimpl::Exception{eptr}}); + } + // NB: Go to the next iteration. + continue; + } if (result) { @@ -673,28 +726,8 @@ class StreamingInput final: public cv::gimpl::GIslandExecutable::IInput std::vector &in_queues; // FIXME: This can be part of QueueReader cv::GRunArgs &in_constants; // FIXME: This can be part of QueueReader - virtual cv::gimpl::StreamMsg get() override - { - GAPI_ITT_STATIC_LOCAL_HANDLE(inputs_get_hndl, "StreamingInput::get"); - GAPI_ITT_AUTO_TRACE_GUARD(inputs_get_hndl); + cv::optional last_read_msg; - cv::GRunArgs isl_input_args; - - if (!qr.getInputVector(in_queues, in_constants, isl_input_args)) - { - // Stop case - return cv::gimpl::StreamMsg{cv::gimpl::EndOfStream{}}; - } - // Wrap all input cv::Mats with RMats - for (auto& arg : isl_input_args) { - if (arg.index() == cv::GRunArg::index_of()) { - arg = cv::GRunArg{ cv::make_rmat(cv::util::get(arg)) - , arg.meta - }; - } - } - return cv::gimpl::StreamMsg{std::move(isl_input_args)}; - } virtual cv::gimpl::StreamMsg try_get() override { // FIXME: This is not very usable at the moment! @@ -709,17 +742,43 @@ class StreamingInput final: public cv::gimpl::GIslandExecutable::IInput { set(in_descs); } + + const cv::gimpl::StreamMsg& read() + { + GAPI_ITT_STATIC_LOCAL_HANDLE(inputs_get_hndl, "StreamingInput::read"); + GAPI_ITT_AUTO_TRACE_GUARD(inputs_get_hndl); + + last_read_msg = + cv::optional( + qr.getInputVector(in_queues, in_constants)); + return last_read_msg.value(); + } + + virtual cv::gimpl::StreamMsg get() override + { + GAPI_ITT_STATIC_LOCAL_HANDLE(inputs_get_hndl, "StreamingInput::get"); + GAPI_ITT_AUTO_TRACE_GUARD(inputs_get_hndl); + + if (!last_read_msg.has_value()) { + (void)read(); + } + auto msg = std::move(last_read_msg.value()); + last_read_msg = cv::optional(); + return msg; + } }; class StreamingOutput final: public cv::gimpl::GIslandExecutable::IOutput { // These objects form an internal state of the StreamingOutput struct Posting - { - using V = cv::util::variant; - V data; - bool ready = false; - }; + { + using V = cv::util::variant; + V data; + bool ready = false; + }; using PostingList = std::list; std::vector m_postings; std::unordered_map< const void* @@ -820,7 +879,7 @@ class StreamingOutput final: public cv::gimpl::GIslandExecutable::IOutput return ret_val; } - virtual void post(cv::GRunArgP&& argp) override + virtual void post(cv::GRunArgP&& argp, const std::exception_ptr& exptr) override { GAPI_ITT_STATIC_LOCAL_HANDLE(outputs_post_hndl, "StreamingOutput::post"); GAPI_ITT_AUTO_TRACE_GUARD(outputs_post_hndl); @@ -834,6 +893,9 @@ class StreamingOutput final: public cv::gimpl::GIslandExecutable::IOutput const int out_idx = it->second.first; const auto out_iter = it->second.second; out_iter->ready = true; + if (exptr) { + out_iter->data = cv::gimpl::Exception{exptr}; + } m_postIdx.erase(it); // Drop the link from the cache anyway if (out_iter != m_postings[out_idx].begin()) { @@ -845,16 +907,22 @@ class StreamingOutput final: public cv::gimpl::GIslandExecutable::IOutput while (post_iter != m_postings[out_idx].end() && post_iter->ready == true) { Cmd cmd; - if (cv::util::holds_alternative(post_iter->data)) + switch (post_iter->data.index()) { - cmd = Cmd{cv::util::get(post_iter->data)}; - } - else - { - GAPI_Assert(cv::util::holds_alternative(post_iter->data)); - cmd = Cmd{Stop{}}; - m_stops_sent++; + case Posting::V::index_of(): + cmd = Cmd{cv::util::get(post_iter->data)}; + break; + case Posting::V::index_of(): + cmd = Cmd{cv::util::get(post_iter->data)}; + break; + case Posting::V::index_of(): + cmd = Cmd{Stop{}}; + m_stops_sent++; + break; + default: + GAPI_Assert(false && "Unreachable code"); } + for (auto &&q : m_out_queues[out_idx]) { q->push(cmd); @@ -889,6 +957,7 @@ class StreamingOutput final: public cv::gimpl::GIslandExecutable::IOutput } } } + void meta(const cv::GRunArgP &out, const cv::GRunArg::Meta &m) override { std::lock_guard lock{m_mutex}; @@ -919,6 +988,32 @@ public: // when it posted/resent all STOP messages to all its outputs. return m_stops_sent == desc().size(); } + + virtual void post(cv::gimpl::Exception&& error) override + { + std::lock_guard lock{m_mutex}; + // If the posting list is empty, just broadcast the stop message. + // If it is not, enqueue the Stop message in the postings list. + for (auto &&it : ade::util::indexed(m_postings)) + { + const auto idx = ade::util::index(it); + auto &lst = ade::util::value(it); + if (lst.empty()) + { + for (auto &&q : m_out_queues[idx]) + { + q->push(Cmd(std::move(error))); + } + } + else + { + Posting p; + p.data = Posting::V{std::move(error)}; + p.ready = true; + lst.push_back(std::move(p)); // FIXME: For some reason {}-ctor didn't work here + } + } + } }; // This thread is a plain dumb processing actor. What it do is just: @@ -947,7 +1042,17 @@ void islandActorThread(std::vector in_rcs, while (!output.done()) { GAPI_ITT_AUTO_TRACE_GUARD(island_hndl); - island_exec->run(input, output); + // NB: In case the input message is an cv::gimpl::Exception + // handle it in a general way. + if (cv::util::holds_alternative(input.read())) + { + auto in_msg = input.get(); + output.post(std::move(cv::util::get(in_msg))); + } + else + { + island_exec->run(input, output); + } } } @@ -984,26 +1089,33 @@ void collectorThread(std::vector in_queues, while (true) { GAPI_ITT_AUTO_TRACE_GUARD(collector_hndl); - cv::GRunArgs this_result(out_size); - const bool ok = [&](){ + const auto result = [&](){ GAPI_ITT_AUTO_TRACE_GUARD(collector_get_results_hndl); - return qr.getResultsVector(in_queues, in_mapping, out_size, this_result); + return qr.getResultsVector(in_queues, in_mapping, out_size); }(); - if (!ok) + switch (result.index()) { - if (handle_stop) + case QueueReader::V::index_of(): { - out_queue.push(Cmd{Stop{}}); + GAPI_ITT_AUTO_TRACE_GUARD(collector_push_hndl); + auto this_result = cv::util::get(result); + out_queue.push(Cmd{Result{std::move(this_result), flags}}); + break; } - // Terminate the thread anyway - return; - } - - { - GAPI_ITT_AUTO_TRACE_GUARD(collector_push_hndl); - out_queue.push(Cmd{Result{std::move(this_result), flags}}); + case QueueReader::V::index_of(): + if (handle_stop) + { + out_queue.push(Cmd{Stop{}}); + } + // Terminate the thread anyway + return; + case QueueReader::V::index_of(): + out_queue.push(Cmd{cv::util::get(result)}); + break; + default: + GAPI_Assert(false && "Unreachable code"); } } } @@ -1408,7 +1520,7 @@ cv::gimpl::GStreamingExecutor::GStreamingExecutor(std::unique_ptr && cv::gimpl::GStreamingExecutor::~GStreamingExecutor() { - // FIXME: this is a temporary try-catch exception hadling. + // FIXME: this is a temporary try-catch exception handling. // Need to eliminate throwings from stop() try { if (state == State::READY || state == State::RUNNING) @@ -1452,7 +1564,7 @@ void cv::gimpl::GStreamingExecutor::setSource(GRunArgs &&ins) } } }; - bool islandsRecompiled = false; + const auto new_meta = cv::descr_of(ins); // 0 if (gm.metadata().contains()) // (1) { @@ -1474,8 +1586,6 @@ void cv::gimpl::GStreamingExecutor::setSource(GRunArgs &&ins) } update_int_metas(); // (7) m_reshapable = util::make_optional(is_reshapable); - - islandsRecompiled = true; } else // (8) { @@ -1509,7 +1619,7 @@ void cv::gimpl::GStreamingExecutor::setSource(GRunArgs &&ins) case T::index_of(): #if !defined(GAPI_STANDALONE) emitter.reset(new VideoEmitter{emit_arg}); - // Currently all video inputs are syncronized if sync policy is to drop, + // Currently all video inputs are synchronized if sync policy is to drop, // there is no different fps branches etc, so all video emitters are registered video_emitters.emplace_back(emit_nh); #else @@ -1597,14 +1707,8 @@ void cv::gimpl::GStreamingExecutor::setSource(GRunArgs &&ins) island_meta_info = GIslandModel::traceIslandName(op.nh, m_gim); #endif // OPENCV_WITH_ITT - // If Island Executable is recompiled, all its stuff including internal kernel states - // are recreated and re-initialized automatically. - // But if not, we should notify Island Executable about new started stream to let it update - // its internal variables. - if (!islandsRecompiled) - { - op.isl_exec->handleNewStream(); - } + // Notify island executable about a new stream to let it update its internal variables. + op.isl_exec->handleNewStream(); m_threads.emplace_back(islandActorThread, op.in_objects, @@ -1707,16 +1811,24 @@ bool cv::gimpl::GStreamingExecutor::pull(cv::GRunArgsP &&outs) Cmd cmd; m_out_queue.pop(cmd); - if (cv::util::holds_alternative(cmd)) - { - wait_shutdown(); - return false; + switch (cmd.index()) { + case Cmd::index_of(): + wait_shutdown(); + return false; + case Cmd::index_of(): { + GAPI_Assert(cv::util::holds_alternative(cmd)); + cv::GRunArgs &this_result = cv::util::get(cmd).args; + sync_data(this_result, outs); + return true; + } + case Cmd::index_of(): { + std::rethrow_exception(cv::util::get(cmd).eptr); + return true; + } + default: + GAPI_Assert(false && "Unsupported cmd type in pull"); } - - GAPI_Assert(cv::util::holds_alternative(cmd)); - cv::GRunArgs &this_result = cv::util::get(cmd).args; - sync_data(this_result, outs); - return true; + GAPI_Assert(false && "Unreachable code"); } bool cv::gimpl::GStreamingExecutor::pull(cv::GOptRunArgsP &&outs) @@ -1734,15 +1846,20 @@ bool cv::gimpl::GStreamingExecutor::pull(cv::GOptRunArgsP &&outs) Cmd cmd; m_out_queue.pop(cmd); - if (cv::util::holds_alternative(cmd)) - { - wait_shutdown(); - return false; + switch (cmd.index()) { + case Cmd::index_of(): + wait_shutdown(); + return false; + case Cmd::index_of(): { + sync_data(cv::util::get(cmd), outs); + return true; + } + case Cmd::index_of(): { + std::rethrow_exception(cv::util::get(cmd).eptr); + return true; + } } - - GAPI_Assert(cv::util::holds_alternative(cmd)); - sync_data(cv::util::get(cmd), outs); - return true; + GAPI_Assert(false && "Unreachable code"); } std::tuple> cv::gimpl::GStreamingExecutor::pull() diff --git a/modules/gapi/src/executor/gstreamingexecutor.hpp b/modules/gapi/src/executor/gstreamingexecutor.hpp index b4aadcbbaf..da27f6a646 100644 --- a/modules/gapi/src/executor/gstreamingexecutor.hpp +++ b/modules/gapi/src/executor/gstreamingexecutor.hpp @@ -50,11 +50,12 @@ struct Result { using Cmd = cv::util::variant < cv::util::monostate - , Start // Tells emitters to start working. Not broadcasted to workers. - , Stop // Tells emitters to stop working. Broadcasted to workers. - , cv::GRunArg // Workers data payload to process. - , Result // Pipeline's data for gout() - >; + , Start // Tells emitters to start working. Not broadcasted to workers. + , Stop // Tells emitters to stop working. Broadcasted to workers. + , cv::GRunArg // Workers data payload to process. + , Result // Pipeline's data for gout() + , cv::gimpl::Exception // Exception which is thrown while execution. + >; // Interface over a queue. The underlying queue implementation may be // different. This class is mainly introduced to bring some diff --git a/modules/gapi/src/executor/gtbbexecutor.cpp b/modules/gapi/src/executor/gtbbexecutor.cpp index cc6ccf9ef4..64cd9b5313 100644 --- a/modules/gapi/src/executor/gtbbexecutor.cpp +++ b/modules/gapi/src/executor/gtbbexecutor.cpp @@ -353,7 +353,7 @@ namespace graph { } ctx.executed++; - // reset dependecy_count to initial state to simplify re-execution of the same graph + // reset dependency_count to initial state to simplify re-execution of the same graph node->dependency_count = node->dependencies; return result; diff --git a/modules/gapi/src/streaming/gstreamer/gstreamer_pipeline_facade.cpp b/modules/gapi/src/streaming/gstreamer/gstreamer_pipeline_facade.cpp index cd782537ca..d0f5bd5db2 100644 --- a/modules/gapi/src/streaming/gstreamer/gstreamer_pipeline_facade.cpp +++ b/modules/gapi/src/streaming/gstreamer/gstreamer_pipeline_facade.cpp @@ -111,11 +111,11 @@ GstElement* GStreamerPipelineFacade::getElementByName(const std::string& element void GStreamerPipelineFacade::completePreroll() { // FIXME: If there are multiple sources in pipeline and one of them is live, then pipeline // will return GST_STATE_CHANGE_NO_PREROLL while pipeline pausing. - // But appsink may not be connected to this live source and only to anothers, + // But appsink may not be connected to this live source and only to another, // not-live ones. So, it is not required to start the playback for appsink to complete // the preroll. // Starting of playback for the not-live sources before the first frame pull will lead - // to loosing of some amount of frames and pulling of the first frame can return frame + // to losing of some amount of frames and pulling of the first frame can return frame // which is far from the first. // // Need to handle this case or forbid to mix multiples sources of different diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp index ad0e5bf667..0a5f8f4a35 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.cpp @@ -273,7 +273,7 @@ size_t VPLCPUAccelerationPolicy::get_surface_count(pool_key_t key) const { } cv::MediaFrame::AdapterPtr VPLCPUAccelerationPolicy::create_frame_adapter(pool_key_t key, - mfxFrameSurface1* surface) { + const FrameConstructorArgs ¶ms) { auto pool_it = pool_table.find(key); if (pool_it == pool_table.end()) { std::stringstream ss; @@ -284,7 +284,8 @@ cv::MediaFrame::AdapterPtr VPLCPUAccelerationPolicy::create_frame_adapter(pool_k } pool_t& requested_pool = pool_it->second; - return cv::MediaFrame::AdapterPtr{new VPLMediaFrameCPUAdapter(requested_pool.find_by_handle(surface))}; + return cv::MediaFrame::AdapterPtr{new VPLMediaFrameCPUAdapter(requested_pool.find_by_handle(params.assoc_surface), + params.assoc_handle)}; } } // namespace onevpl } // namespace wip diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp index 8a2061dce0..57c14ad792 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_cpu.hpp @@ -38,7 +38,7 @@ struct GAPI_EXPORTS VPLCPUAccelerationPolicy final : public VPLAccelerationPolic size_t get_surface_count(pool_key_t key) const override; cv::MediaFrame::AdapterPtr create_frame_adapter(pool_key_t key, - mfxFrameSurface1* surface) override; + const FrameConstructorArgs& args) override; private: std::map pool_table; diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp index 02720f3774..932d7d7842 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.cpp @@ -157,12 +157,21 @@ size_t VPLDX11AccelerationPolicy::get_free_surface_count(pool_key_t) const { GAPI_Assert(false && "get_free_surface_count() is not implemented"); } -size_t VPLDX11AccelerationPolicy::get_surface_count(pool_key_t) const { - GAPI_Assert(false && "VPLDX11AccelerationPolicy::get_surface_count() is not implemented"); +size_t VPLDX11AccelerationPolicy::get_surface_count(pool_key_t key) const { + auto pool_it = pool_table.find(key); + if (pool_it == pool_table.end()) { + std::stringstream ss; + ss << "key is not found: " << key << ", table size: " << pool_table.size(); + const std::string& str = ss.str(); + GAPI_LOG_WARNING(nullptr, str); + throw std::runtime_error(std::string(__FUNCTION__) + " - " + str); + } + return pool_it->second.total_size(); } -cv::MediaFrame::AdapterPtr VPLDX11AccelerationPolicy::create_frame_adapter(pool_key_t key, - mfxFrameSurface1* surface) { +cv::MediaFrame::AdapterPtr +VPLDX11AccelerationPolicy::create_frame_adapter(pool_key_t key, + const FrameConstructorArgs ¶ms) { auto pool_it = pool_table.find(key); if (pool_it == pool_table.end()) { std::stringstream ss; @@ -173,7 +182,8 @@ cv::MediaFrame::AdapterPtr VPLDX11AccelerationPolicy::create_frame_adapter(pool_ } pool_t& requested_pool = pool_it->second; - return cv::MediaFrame::AdapterPtr{new VPLMediaFrameDX11Adapter(requested_pool.find_by_handle(surface))}; + return cv::MediaFrame::AdapterPtr{new VPLMediaFrameDX11Adapter(requested_pool.find_by_handle(params.assoc_surface), + params.assoc_handle)}; } mfxStatus VPLDX11AccelerationPolicy::alloc_cb(mfxHDL pthis, mfxFrameAllocRequest *request, @@ -283,12 +293,28 @@ mfxStatus VPLDX11AccelerationPolicy::on_alloc(const mfxFrameAllocRequest *reques desc.BindFlags = 0; } + /* NB: + * On the one hand current OpenVINO API doesn't support texture array and + * D3D11 API doesn't allow to address specific texture element in array. + * On the other hand using textures array should be more performant case + * in applications (according to community experience) + * So, to be compliant with OV let's turn off textures array feature, but keep + * this code in commented section to consider such "optimization" in future + */ +#if 0 size_t main_textures_count = 1; if (D3D11_BIND_RENDER_TARGET & desc.BindFlags) { GAPI_LOG_DEBUG(nullptr, "Use array of testures instead of texture array"); desc.ArraySize = 1; main_textures_count = request->NumFrameSuggested; } +#else + // enforcement to use array of textures + size_t main_textures_count = request->NumFrameSuggested; + + // enforcement to do not use texture array as subresources as part of a single texture + desc.ArraySize = 1; +#endif // create GPU textures HRESULT err = S_OK; @@ -407,6 +433,8 @@ mfxStatus VPLDX11AccelerationPolicy::on_free(mfxFrameAllocResponse *response) { } allocation_table.erase(table_it); + GAPI_LOG_DEBUG(nullptr, "Allocation by requested id: " << response->AllocId << + " has been erased"); return MFX_ERR_NONE; } } // namespace onevpl diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp index 893698eb36..61513a45af 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_dx11.hpp @@ -49,7 +49,7 @@ struct GAPI_EXPORTS VPLDX11AccelerationPolicy final: public VPLAccelerationPolic size_t get_surface_count(pool_key_t key) const override; cv::MediaFrame::AdapterPtr create_frame_adapter(pool_key_t key, - mfxFrameSurface1* surface) override; + const FrameConstructorArgs ¶ms) override; private: ID3D11Device *hw_handle; ID3D11DeviceContext* device_context; diff --git a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp index b1d7c25bb1..c955b7529a 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/accel_policy_interface.hpp @@ -16,13 +16,12 @@ #ifdef HAVE_ONEVPL #include "streaming/onevpl/onevpl_export.hpp" +#include "streaming/onevpl/accelerators/surface/base_frame_adapter.hpp" namespace cv { namespace gapi { namespace wip { namespace onevpl { - -class Surface; struct VPLAccelerationPolicy { using device_selector_ptr_t = std::shared_ptr; @@ -40,6 +39,11 @@ struct VPLAccelerationPolicy size_t out_buf_ptr_offset, size_t out_buf_ptr_size)>; + struct FrameConstructorArgs { + surface_t::handle_t *assoc_surface; + session_t assoc_handle; + }; + device_selector_ptr_t get_device_selector() { return device_selector; } @@ -50,7 +54,7 @@ struct VPLAccelerationPolicy virtual void init(session_t session) = 0; virtual void deinit(session_t session) = 0; - // Limitation: cannot give guarantee in succesful memory realloccation + // Limitation: cannot give guarantee in successful memory realloccation // for existing workspace in existing pool (see realloc) // thus it is not implemented, // PLEASE provide initial memory area large enough @@ -61,8 +65,7 @@ struct VPLAccelerationPolicy virtual size_t get_surface_count(pool_key_t key) const = 0; virtual cv::MediaFrame::AdapterPtr create_frame_adapter(pool_key_t key, - mfxFrameSurface1* surface) = 0; -private: + const FrameConstructorArgs ¶ms) = 0; device_selector_ptr_t device_selector; }; } // namespace onevpl diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.cpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.cpp new file mode 100644 index 0000000000..76da3dbe50 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.cpp @@ -0,0 +1,77 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#include "streaming/onevpl/accelerators/surface/base_frame_adapter.hpp" +#include "streaming/onevpl/accelerators/surface/surface.hpp" +#include "logger.hpp" + +#ifdef HAVE_ONEVPL +#include "streaming/onevpl/onevpl_export.hpp" + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { +BaseFrameAdapter::BaseFrameAdapter(std::shared_ptr surface, + SessionHandle assoc_handle, + AccelType accel): + parent_surface_ptr(surface), parent_handle(assoc_handle), + acceleration_type(accel) { + GAPI_Assert(parent_surface_ptr && "Surface is nullptr"); + GAPI_Assert(parent_handle && "mfxSession is nullptr"); + + const Surface::info_t& info = parent_surface_ptr->get_info(); + GAPI_LOG_DEBUG(nullptr, "surface: " << parent_surface_ptr->get_handle() << + ", w: " << info.Width << ", h: " << info.Height << + ", p: " << parent_surface_ptr->get_data().Pitch << + ", frame id: " << reinterpret_cast(this)); + switch(info.FourCC) { + case MFX_FOURCC_I420: + throw std::runtime_error("MediaFrame doesn't support I420 type"); + break; + case MFX_FOURCC_NV12: + frame_desc.fmt = MediaFormat::NV12; + break; + default: + throw std::runtime_error("MediaFrame unknown 'fmt' type: " + std::to_string(info.FourCC)); + } + + frame_desc.size = cv::Size{info.Width, info.Height}; + parent_surface_ptr->obtain_lock(); +} + +BaseFrameAdapter::~BaseFrameAdapter() { + // Each BaseFrameAdapter releases mfx surface counter + // The last BaseFrameAdapter releases shared Surface pointer + // The last surface pointer releases workspace memory + GAPI_LOG_DEBUG(nullptr, "destroy frame id: " << reinterpret_cast(this)); + parent_surface_ptr->release_lock(); +} + +const std::shared_ptr& BaseFrameAdapter::get_surface() const { + return parent_surface_ptr; +} + +std::shared_ptr BaseFrameAdapter::surface() { + return parent_surface_ptr; +} + +const BaseFrameAdapter::SessionHandle BaseFrameAdapter::get_session_handle() const { + return parent_handle; +} + +cv::GFrameDesc BaseFrameAdapter::meta() const { + return frame_desc; +} +AccelType BaseFrameAdapter::accel_type() const { + return acceleration_type; +} + +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // HAVE_ONEVPL diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.hpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.hpp new file mode 100644 index 0000000000..a3dfcf542f --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/base_frame_adapter.hpp @@ -0,0 +1,47 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifndef GAPI_STREAMING_ONEVPL_ACCELERATORS_SURFACE_BASE_FRAME_ADAPTER_HPP +#define GAPI_STREAMING_ONEVPL_ACCELERATORS_SURFACE_BASE_FRAME_ADAPTER_HPP +#include + +#include +#include +#include "streaming/onevpl/accelerators/surface/surface.hpp" + +#ifdef HAVE_ONEVPL + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { +class BaseFrameAdapter : public cv::MediaFrame::IAdapter { +public: + using SessionHandle = mfxSession; + + const std::shared_ptr& get_surface() const; + const SessionHandle get_session_handle() const; + + cv::GFrameDesc meta() const override; + AccelType accel_type() const; +protected: + BaseFrameAdapter(std::shared_ptr assoc_surface, SessionHandle assoc_handle, + AccelType accel); + ~BaseFrameAdapter(); + std::shared_ptr surface(); + + std::shared_ptr parent_surface_ptr; + SessionHandle parent_handle; + GFrameDesc frame_desc; + AccelType acceleration_type; +}; +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv + +#endif // HAVE_ONEVPL +#endif // GAPI_STREAMING_ONEVPL_ACCELERATORS_SURFACE_BASE_FRAME_ADAPTER_HPP diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.cpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.cpp index 39094c9bc3..751ed7abbd 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.cpp @@ -16,46 +16,16 @@ namespace gapi { namespace wip { namespace onevpl { -VPLMediaFrameCPUAdapter::VPLMediaFrameCPUAdapter(std::shared_ptr surface): - parent_surface_ptr(surface) { - - GAPI_Assert(parent_surface_ptr && "Surface is nullptr"); - GAPI_LOG_DEBUG(nullptr, "surface: " << parent_surface_ptr->get_handle() << - ", w: " << parent_surface_ptr->get_info().Width << - ", h: " << parent_surface_ptr->get_info().Height << - ", p: " << parent_surface_ptr->get_data().Pitch); - const Surface::info_t& info = parent_surface_ptr->get_info(); - switch(info.FourCC) - { - case MFX_FOURCC_I420: - throw std::runtime_error("MediaFrame doesn't support I420 type"); - break; - case MFX_FOURCC_NV12: - frame_desc.fmt = MediaFormat::NV12; - break; - default: - throw std::runtime_error("MediaFrame unknown 'fmt' type: " + std::to_string(info.FourCC)); - } - - frame_desc.size = cv::Size{info.Width, info.Height}; - parent_surface_ptr->obtain_lock(); +VPLMediaFrameCPUAdapter::VPLMediaFrameCPUAdapter(std::shared_ptr surface, + SessionHandle assoc_handle): + BaseFrameAdapter(surface, assoc_handle, AccelType::HOST) { } -VPLMediaFrameCPUAdapter::~VPLMediaFrameCPUAdapter() { - - // Each VPLMediaFrameCPUAdapter releases mfx surface counter - // The last VPLMediaFrameCPUAdapter releases shared Surface pointer - // The last surface pointer releases workspace memory - parent_surface_ptr->release_lock(); -} - -cv::GFrameDesc VPLMediaFrameCPUAdapter::meta() const { - return frame_desc; -} +VPLMediaFrameCPUAdapter::~VPLMediaFrameCPUAdapter() = default; MediaFrame::View VPLMediaFrameCPUAdapter::access(MediaFrame::Access) { - const Surface::data_t& data = parent_surface_ptr->get_data(); - const Surface::info_t& info = parent_surface_ptr->get_info(); + const Surface::data_t& data = get_surface()->get_data(); + const Surface::info_t& info = get_surface()->get_info(); using stride_t = typename cv::MediaFrame::View::Strides::value_type; stride_t pitch = static_cast(data.Pitch); diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.hpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.hpp index 1c51ad7473..849c3a2775 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/cpu_frame_adapter.hpp @@ -6,10 +6,8 @@ #ifndef GAPI_STREAMING_ONEVPL_ACCELERATORS_SURFACE_CPU_FRAME_ADAPTER_HPP #define GAPI_STREAMING_ONEVPL_ACCELERATORS_SURFACE_CPU_FRAME_ADAPTER_HPP -#include -#include -#include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS +#include "streaming/onevpl/accelerators/surface/base_frame_adapter.hpp" #ifdef HAVE_ONEVPL @@ -18,22 +16,20 @@ namespace gapi { namespace wip { namespace onevpl { -class Surface; -class VPLMediaFrameCPUAdapter : public cv::MediaFrame::IAdapter { +class VPLMediaFrameCPUAdapter : public BaseFrameAdapter { public: // GAPI_EXPORTS for tests - GAPI_EXPORTS explicit VPLMediaFrameCPUAdapter(std::shared_ptr assoc_surface); + GAPI_EXPORTS explicit VPLMediaFrameCPUAdapter(std::shared_ptr assoc_surface, + SessionHandle assoc_handle); GAPI_EXPORTS ~VPLMediaFrameCPUAdapter(); - cv::GFrameDesc meta() const override; + MediaFrame::View access(MediaFrame::Access) override; // The default implementation does nothing cv::util::any blobParams() const override; void serialize(cv::gapi::s11n::IOStream&) override; void deserialize(cv::gapi::s11n::IIStream&) override; -private: - std::shared_ptr parent_surface_ptr; - GFrameDesc frame_desc; + }; } // namespace onevpl } // namespace wip diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp index 6afa2cf0b6..885fa1589a 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.cpp @@ -40,117 +40,71 @@ void unlock_mid(mfxMemId mid, mfxFrameData &data, MediaFrame::Access mode) { } } -VPLMediaFrameDX11Adapter::VPLMediaFrameDX11Adapter(std::shared_ptr surface): - parent_surface_ptr(surface) { - GAPI_Assert(parent_surface_ptr && "Surface is nullptr"); - - const Surface::info_t& info = parent_surface_ptr->get_info(); - Surface::data_t& data = parent_surface_ptr->get_data(); - GAPI_LOG_DEBUG(nullptr, "surface: " << parent_surface_ptr->get_handle() << - ", w: " << info.Width << ", h: " << info.Height << - ", p: " << data.Pitch << - ", frame id: " << reinterpret_cast(this)); - switch(info.FourCC) - { - case MFX_FOURCC_I420: - throw std::runtime_error("MediaFrame doesn't support I420 type"); - break; - case MFX_FOURCC_NV12: - frame_desc.fmt = MediaFormat::NV12; - break; - default: - throw std::runtime_error("MediaFrame unknown 'fmt' type: " + std::to_string(info.FourCC)); - } - frame_desc.size = cv::Size{info.Width, info.Height}; +VPLMediaFrameDX11Adapter::VPLMediaFrameDX11Adapter(std::shared_ptr assoc_surface, + SessionHandle assoc_handle): + BaseFrameAdapter(assoc_surface, assoc_handle, AccelType::DX11) { + Surface::data_t& data = assoc_surface->get_data(); LockAdapter* alloc_data = reinterpret_cast(data.MemId); alloc_data->set_adaptee(this); - - parent_surface_ptr->obtain_lock(); } VPLMediaFrameDX11Adapter::~VPLMediaFrameDX11Adapter() { - // Each VPLMediaFrameDX11Adapter releases mfx surface counter - // The last VPLMediaFrameDX11Adapter releases shared Surface pointer - // The last surface pointer releases workspace memory - - GAPI_LOG_DEBUG(nullptr, "destroy frame id: " << reinterpret_cast(this)); - - Surface::data_t& data = parent_surface_ptr->get_data(); + Surface::data_t& data = surface()->get_data(); LockAdapter* alloc_data = reinterpret_cast(data.MemId); alloc_data->set_adaptee(nullptr); - - parent_surface_ptr->release_lock(); -} - -cv::GFrameDesc VPLMediaFrameDX11Adapter::meta() const { - return frame_desc; } MediaFrame::View VPLMediaFrameDX11Adapter::access(MediaFrame::Access mode) { - Surface::data_t& data = parent_surface_ptr->get_data(); - const Surface::info_t& info = parent_surface_ptr->get_info(); + // NB: make copy for some copyable object, because access release may be happened + // after source/pool destruction, so we need a copy + auto surface_ptr_copy = surface(); + Surface::data_t& data = surface_ptr_copy->get_data(); + const Surface::info_t& info = surface_ptr_copy->get_info(); void* frame_id = reinterpret_cast(this); - GAPI_LOG_DEBUG(nullptr, "START lock frame in surface: " << parent_surface_ptr->get_handle() << + GAPI_LOG_DEBUG(nullptr, "START lock frame in surface: " << surface_ptr_copy->get_handle() << ", frame id: " << frame_id); // lock MT lock_mid(data.MemId, data, mode); - GAPI_LOG_DEBUG(nullptr, "FINISH lock frame in surface: " << parent_surface_ptr->get_handle() << + GAPI_LOG_DEBUG(nullptr, "FINISH lock frame in surface: " << surface_ptr_copy->get_handle() << ", frame id: " << frame_id); using stride_t = typename cv::MediaFrame::View::Strides::value_type; stride_t pitch = static_cast(data.Pitch); - // NB: make copy for some copyable object, because access release may be happened - // after source/pool destruction, so we need a copy - auto parent_surface_ptr_copy = parent_surface_ptr; + auto release_guard = [surface_ptr_copy, frame_id, mode] () { + surface_ptr_copy->obtain_lock(); + + auto& data = surface_ptr_copy->get_data(); + GAPI_LOG_DEBUG(nullptr, "START unlock frame in surface: " << surface_ptr_copy->get_handle() << + ", frame id: " << frame_id); + unlock_mid(data.MemId, data, mode); + + GAPI_LOG_DEBUG(nullptr, "FINISH unlock frame in surface: " << surface_ptr_copy->get_handle() << + ", frame id: " << frame_id); + surface_ptr_copy->release_lock(); + }; + switch(info.FourCC) { case MFX_FOURCC_I420: { GAPI_Assert(data.Y && data.U && data.V && "MFX_FOURCC_I420 frame data is nullptr"); cv::MediaFrame::View::Ptrs pp = { data.Y, data.U, data.V, nullptr }; cv::MediaFrame::View::Strides ss = { pitch, pitch / 2, pitch / 2, 0u }; - return cv::MediaFrame::View(std::move(pp), std::move(ss), - [parent_surface_ptr_copy, - frame_id, mode] () { - parent_surface_ptr_copy->obtain_lock(); - - auto& data = parent_surface_ptr_copy->get_data(); - GAPI_LOG_DEBUG(nullptr, "START unlock frame in surface: " << parent_surface_ptr_copy->get_handle() << - ", frame id: " << frame_id); - unlock_mid(data.MemId, data, mode); - - GAPI_LOG_DEBUG(nullptr, "FINISH unlock frame in surface: " << parent_surface_ptr_copy->get_handle() << - ", frame id: " << frame_id); - - parent_surface_ptr_copy->release_lock(); - }); + return cv::MediaFrame::View(std::move(pp), std::move(ss), release_guard); } case MFX_FOURCC_NV12: { if (!data.Y || !data.UV) { - GAPI_LOG_WARNING(nullptr, "Empty data detected!!! for surface: " << parent_surface_ptr->get_handle() << + GAPI_LOG_WARNING(nullptr, "Empty data detected!!! for surface: " << surface_ptr_copy->get_handle() << ", frame id: " << frame_id); } GAPI_Assert(data.Y && data.UV && "MFX_FOURCC_NV12 frame data is nullptr"); cv::MediaFrame::View::Ptrs pp = { data.Y, data.UV, nullptr, nullptr }; cv::MediaFrame::View::Strides ss = { pitch, pitch, 0u, 0u }; - return cv::MediaFrame::View(std::move(pp), std::move(ss), - [parent_surface_ptr_copy, - frame_id, mode] () { - parent_surface_ptr_copy->obtain_lock(); - - auto& data = parent_surface_ptr_copy->get_data(); - GAPI_LOG_DEBUG(nullptr, "START unlock frame in surface: " << parent_surface_ptr_copy->get_handle() << - ", frame id: " << frame_id); - unlock_mid(data.MemId, data, mode); - - GAPI_LOG_DEBUG(nullptr, "FINISH unlock frame in surface: " << parent_surface_ptr_copy->get_handle() << - ", frame id: " << frame_id); - parent_surface_ptr_copy->release_lock(); - }); + return cv::MediaFrame::View(std::move(pp), std::move(ss), release_guard); } break; default: @@ -162,8 +116,9 @@ cv::util::any VPLMediaFrameDX11Adapter::blobParams() const { /*GAPI_Assert(false && "VPLMediaFrameDX11Adapter::blobParams() is not fully integrated" "in OpenVINO InferenceEngine and would be temporary disable.");*/ #ifdef HAVE_INF_ENGINE - Surface::data_t& data = parent_surface_ptr->get_data(); - const Surface::info_t& info = parent_surface_ptr->get_info(); + auto surface_ptr_copy = get_surface(); + Surface::data_t& data = surface_ptr_copy->get_data(); + const Surface::info_t& info = surface_ptr_copy->get_info(); NativeHandleAdapter* native_handle_getter = reinterpret_cast(data.MemId); mfxHDLPair handle{}; diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.hpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.hpp index ca6602353b..39528ca6a5 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/dx11_frame_adapter.hpp @@ -8,9 +8,7 @@ #define GAPI_STREAMING_ONEVPL_ACCELERATORS_SURFACE_DX11_FRAME_ADAPTER_HPP #include -#include -#include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS - +#include "streaming/onevpl/accelerators/surface/base_frame_adapter.hpp" #include "streaming/onevpl/accelerators/utils/shared_lock.hpp" #ifdef HAVE_ONEVPL #include "streaming/onevpl/onevpl_export.hpp" @@ -30,15 +28,13 @@ namespace cv { namespace gapi { namespace wip { namespace onevpl { - -class Surface; -class VPLMediaFrameDX11Adapter final: public cv::MediaFrame::IAdapter, +class VPLMediaFrameDX11Adapter final: public BaseFrameAdapter, public SharedLock { public: // GAPI_EXPORTS for tests - GAPI_EXPORTS VPLMediaFrameDX11Adapter(std::shared_ptr assoc_surface); + GAPI_EXPORTS VPLMediaFrameDX11Adapter(std::shared_ptr assoc_surface, + SessionHandle assoc_handle); GAPI_EXPORTS ~VPLMediaFrameDX11Adapter(); - cv::GFrameDesc meta() const override; MediaFrame::View access(MediaFrame::Access) override; // The default implementation does nothing @@ -48,9 +44,7 @@ public: static DXGI_FORMAT get_dx11_color_format(uint32_t mfx_fourcc); private: - std::shared_ptr parent_surface_ptr; mfxFrameAllocator allocator; - GFrameDesc frame_desc; }; } // namespace onevpl } // namespace wip diff --git a/modules/gapi/src/streaming/onevpl/accelerators/surface/surface.hpp b/modules/gapi/src/streaming/onevpl/accelerators/surface/surface.hpp index 4f93312e24..68d7a697e7 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/surface/surface.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/surface/surface.hpp @@ -59,7 +59,7 @@ public: /** * Extract value thread-safe lock counter (see @ref Surface description). * It's usual situation that counter may be instantly decreased in other thread after this method called. - * We need instantaneous value. This method syncronized in inter-threading way with @ref Surface::release_lock() + * We need instantaneous value. This method synchronized in inter-threading way with @ref Surface::release_lock() * * @return fetched locks count. */ diff --git a/modules/gapi/src/streaming/onevpl/accelerators/utils/elastic_barrier.hpp b/modules/gapi/src/streaming/onevpl/accelerators/utils/elastic_barrier.hpp index 827392f8be..b91554f435 100644 --- a/modules/gapi/src/streaming/onevpl/accelerators/utils/elastic_barrier.hpp +++ b/modules/gapi/src/streaming/onevpl/accelerators/utils/elastic_barrier.hpp @@ -163,7 +163,7 @@ private: * deinitialization called off in `on_unlock` * because new `incoming` request had appeared at here before * `on_unlock` started deinit procedure in another thread. - * So no reinit required because no deinit had happended + * So no reinit required because no deinit had happened * * main `busy-wait` request must break busy-wait state * and become `outgoing` request. diff --git a/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.cpp b/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.cpp index a4d85f2598..b3beb71fb1 100644 --- a/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.cpp +++ b/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.cpp @@ -38,6 +38,20 @@ namespace gapi { namespace wip { namespace onevpl { +std::vector insertCfgparam(std::vector &¶m_array, AccelType type) { + switch (type) { + case AccelType::HOST: + break; + case AccelType::DX11: + param_array.push_back(CfgParam::create_acceleration_mode(MFX_ACCEL_MODE_VIA_D3D11)); + break; + default: + GAPI_DbgAssert(false && "Unexpected AccelType"); + break; + } + return std::move(param_array); +} + CfgParamDeviceSelector::CfgParamDeviceSelector(const CfgParams& cfg_params) : suggested_device(IDeviceSelector::create(nullptr, "CPU", AccelType::HOST)), suggested_context(IDeviceSelector::create(nullptr, AccelType::HOST)) { @@ -231,6 +245,52 @@ CfgParamDeviceSelector::CfgParamDeviceSelector(Device::Ptr device_ptr, } } +CfgParamDeviceSelector::CfgParamDeviceSelector(const Device &device, + const Context &ctx, + CfgParams) : + suggested_device(device), + suggested_context(ctx) { + + switch(device.get_type()) { + case AccelType::DX11: { +#ifdef HAVE_DIRECTX +#ifdef HAVE_D3D11 + ID3D11Device* dx_device_ptr = + reinterpret_cast(suggested_device.get_ptr()); + dx_device_ptr->AddRef(); + + ID3D11DeviceContext* dx_ctx_ptr = + reinterpret_cast(suggested_context.get_ptr()); + + // oneVPL recommendation + { + ID3D11Multithread *pD11Multithread = nullptr; + dx_ctx_ptr->QueryInterface(IID_PPV_ARGS(&pD11Multithread)); + pD11Multithread->SetMultithreadProtected(true); + pD11Multithread->Release(); + } + + dx_ctx_ptr->AddRef(); + break; +#else + GAPI_LOG_WARNING(nullptr, "Unavailable \"" << CfgParam::acceleration_mode_name() << + ": MFX_ACCEL_MODE_VIA_D3D11\"" + "was chosen for current project configuration"); + throw std::logic_error(std::string("Unsupported \"") + + CfgParam::acceleration_mode_name() + ": MFX_ACCEL_MODE_VIA_D3D11\""); +#endif // HAVE_DIRECTX +#endif // HAVE_D3D11 + } + case AccelType::HOST: + break; + default: + throw std::logic_error(std::string("Unsupported \"") + CfgParam::acceleration_mode_name() + + "\" requested: " + + to_cstring(device.get_type())); + break; + } +} + CfgParamDeviceSelector::~CfgParamDeviceSelector() { GAPI_LOG_INFO(nullptr, "release context: " << suggested_context.get_ptr()); AccelType ctype = suggested_context.get_type(); diff --git a/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.hpp b/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.hpp index 2a55fb09cf..5dae1c508d 100644 --- a/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.hpp +++ b/modules/gapi/src/streaming/onevpl/cfg_param_device_selector.hpp @@ -7,14 +7,14 @@ #ifndef GAPI_STREAMING_ONEVPL_CFG_PARAM_DEVICE_SELECTOR_HPP #define GAPI_STREAMING_ONEVPL_CFG_PARAM_DEVICE_SELECTOR_HPP -#ifdef HAVE_ONEVPL - #include #include #include #include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS +#ifdef HAVE_ONEVPL + namespace cv { namespace gapi { namespace wip { @@ -26,6 +26,9 @@ struct GAPI_EXPORTS CfgParamDeviceSelector final: public IDeviceSelector { const std::string& device_id, Context::Ptr ctx_ptr, const CfgParams& params); + CfgParamDeviceSelector(const Device &device_ptr, + const Context &ctx_ptr, + CfgParams params); ~CfgParamDeviceSelector(); DeviceScoreTable select_devices() const override; diff --git a/modules/gapi/src/streaming/onevpl/cfg_params_parser.cpp b/modules/gapi/src/streaming/onevpl/cfg_params_parser.cpp index d748825b1c..afb92f4443 100644 --- a/modules/gapi/src/streaming/onevpl/cfg_params_parser.cpp +++ b/modules/gapi/src/streaming/onevpl/cfg_params_parser.cpp @@ -94,75 +94,22 @@ std::vector get_params_from_string(const std::string& str) { ret.push_back(creator.create(name, cstr_to_mfx_accel_mode(value.c_str()))); } else if (name == "mfxImplDescription.ApiVersion.Version") { ret.push_back(creator.create(name, cstr_to_mfx_version(value.c_str()))); - } else if (name == CfgParam::frames_pool_size_name()) { + } else if ((name == CfgParam::frames_pool_size_name()) || (name == CfgParam::vpp_frames_pool_size_name())) { ret.push_back(creator.create(name, strtoull_or_throw(value.c_str()), false)); - } else if (name == CfgParam::vpp_frames_pool_size_name()) { - ret.push_back(creator.create(name, strtoull_or_throw(value.c_str()), false)); - } else if (name == CfgParam::vpp_in_width_name()) { + } else if ((name == CfgParam::vpp_in_width_name()) || (name == CfgParam::vpp_in_height_name()) || + (name == CfgParam::vpp_in_crop_w_name()) || (name == CfgParam::vpp_in_crop_h_name()) || + (name == CfgParam::vpp_in_crop_x_name()) || (name == CfgParam::vpp_in_crop_y_name()) || + (name == CfgParam::vpp_out_chroma_format_name()) || + (name == CfgParam::vpp_out_width_name()) || (name == CfgParam::vpp_out_height_name()) || + (name == CfgParam::vpp_out_crop_w_name()) || (name == CfgParam::vpp_out_crop_h_name()) || + (name == CfgParam::vpp_out_crop_x_name()) || (name == CfgParam::vpp_out_crop_y_name()) || + (name == CfgParam::vpp_out_pic_struct_name())) { ret.push_back(creator.create(name, static_cast(strtoul_or_throw(value.c_str())), false)); - } else if (name == CfgParam::vpp_in_height_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_in_crop_w_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_in_crop_h_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_in_crop_x_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_in_crop_y_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_fourcc_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_chroma_format_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_width_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_height_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_crop_w_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_crop_h_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_crop_x_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_crop_y_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_pic_struct_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_framerate_n_name()) { - ret.push_back(creator.create(name, - static_cast(strtoul_or_throw(value.c_str())), - false)); - } else if (name == CfgParam::vpp_out_framerate_d_name()) { + } else if ((name == CfgParam::vpp_out_fourcc_name()) || + (name == CfgParam::vpp_out_framerate_n_name()) || + (name == CfgParam::vpp_out_framerate_d_name())) { ret.push_back(creator.create(name, static_cast(strtoul_or_throw(value.c_str())), false)); diff --git a/modules/gapi/src/streaming/onevpl/device_selector_interface.cpp b/modules/gapi/src/streaming/onevpl/device_selector_interface.cpp index 1ac88bd807..404b2f3872 100644 --- a/modules/gapi/src/streaming/onevpl/device_selector_interface.cpp +++ b/modules/gapi/src/streaming/onevpl/device_selector_interface.cpp @@ -81,6 +81,42 @@ IDeviceSelector::Score::Type IDeviceSelector::Score::get() const { IDeviceSelector::~IDeviceSelector() { } +namespace detail +{ +struct DeviceContextCreator : public IDeviceSelector { + DeviceScoreTable select_devices() const override { return {};} + DeviceContexts select_context() override { return {};} + + template + static Entity create_entity(Args &&...args) { + return IDeviceSelector::create(std::forward(args)...); + } +}; +} + +Device create_host_device() { + return detail::DeviceContextCreator::create_entity(nullptr, + "CPU", + AccelType::HOST); +} + +Context create_host_context() { + return detail::DeviceContextCreator::create_entity(nullptr, + AccelType::HOST); +} + +Device create_dx11_device(Device::Ptr device_ptr, + const std::string& device_name) { + return detail::DeviceContextCreator::create_entity(device_ptr, + device_name, + AccelType::DX11); +} + +Context create_dx11_context(Context::Ptr ctx_ptr) { + return detail::DeviceContextCreator::create_entity(ctx_ptr, + AccelType::DX11); +} + } // namespace onevpl } // namespace wip } // namespace gapi diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp index d8af94f939..b80ad2e12e 100644 --- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.cpp @@ -44,7 +44,7 @@ void VPLLegacyDecodeEngine::try_modify_pool_size_request_param(const char* param param_name + ", overflow"); } request.NumFrameSuggested = static_cast(new_frames_count); - GAPI_LOG_DEBUG(nullptr, "mfxFrameAllocRequest overriden by user input: " << + GAPI_LOG_DEBUG(nullptr, "mfxFrameAllocRequest overridden by user input: " << ", mfxFrameAllocRequest.NumFrameMin: " << request.NumFrameMin << ", mfxFrameAllocRequest.NumFrameSuggested: " << request.NumFrameSuggested << ", mfxFrameAllocRequest.Type: " << request.Type); @@ -83,11 +83,8 @@ VPLLegacyDecodeEngine::VPLLegacyDecodeEngine(std::unique_ptrDataLength)) - ? my_sess.stream.get() - - : nullptr, /* No more data to read, start decode draining mode*/ - my_sess.procesing_surface_ptr.lock()->get_handle(), + my_sess.get_mfx_bitstream_ptr(), + my_sess.processing_surface_ptr.lock()->get_handle(), &sync_pair.second, &sync_pair.first); @@ -98,12 +95,12 @@ VPLLegacyDecodeEngine::VPLLegacyDecodeEngine(std::unique_ptrget_handle(), + my_sess.get_mfx_bitstream_ptr(), + my_sess.processing_surface_ptr.lock()->get_handle(), &sync_pair.second, &sync_pair.first); @@ -155,7 +152,7 @@ VPLLegacyDecodeEngine::VPLLegacyDecodeEngine(std::unique_ptr ExecutionStatus { return this->process_error(sess.last_status, static_cast(sess)); @@ -180,7 +177,7 @@ VPLLegacyDecodeEngine::SessionParam VPLLegacyDecodeEngine::prepare_session_param mfxVideoParam mfxDecParams {}; mfxDecParams.mfx.CodecId = decoder_id_name; - // set memory stream direction accroding to accelearion policy device type + // set memory stream direction according to acceleration policy device type IDeviceSelector::DeviceScoreTable devices = acceleration_policy->get_device_selector()->select_devices(); GAPI_Assert(devices.size() == 1 && "Multiple(or zero) acceleration devices case is unsupported"); AccelType accel_type = devices.begin()->second.get_type(); @@ -255,7 +252,7 @@ VPLLegacyDecodeEngine::SessionParam VPLLegacyDecodeEngine::prepare_session_param acceleration_policy->create_surface_pool(decRequest, mfxDecParams.mfx.FrameInfo); // Input parameters finished, now initialize decode - // create decoder for session accoring to header recovered from source file + // create decoder for session according to header recovered from source file sts = MFXVideoDECODE_Init(mfx_session, &mfxDecParams); if (MFX_ERR_NONE != sts) { @@ -282,22 +279,19 @@ VPLLegacyDecodeEngine::initialize_session(mfxSession mfx_session, sess_ptr->init_surface_pool(param.decode_pool_key); // prepare working decode surface - sess_ptr->swap_surface(*this); + sess_ptr->swap_decode_surface(*this); return sess_ptr; } -ProcessingEngineBase::ExecutionStatus VPLLegacyDecodeEngine::execute_op(operation_t& op, EngineSession& sess) { - return op(sess); -} - void VPLLegacyDecodeEngine::on_frame_ready(LegacyDecodeSession& sess, mfxFrameSurface1* ready_surface) { GAPI_LOG_DEBUG(nullptr, "[" << sess.session << "], frame ready"); // manage memory ownership rely on acceleration policy + VPLAccelerationPolicy::FrameConstructorArgs args{ready_surface, sess.session}; auto frame_adapter = acceleration_policy->create_frame_adapter(sess.decoder_pool_id, - ready_surface); + args); ready_frames.emplace(cv::MediaFrame(std::move(frame_adapter)), sess.generate_frame_meta()); // pop away synced out object @@ -313,7 +307,7 @@ ProcessingEngineBase::ExecutionStatus VPLLegacyDecodeEngine::process_error(mfxSt { // prepare sync object for new surface try { - sess.swap_surface(*this); + sess.swap_decode_surface(*this); return ExecutionStatus::Continue; } catch (const std::runtime_error& ex) { GAPI_LOG_WARNING(nullptr, "[" << sess.session << "] error: " << ex.what()); @@ -334,7 +328,7 @@ ProcessingEngineBase::ExecutionStatus VPLLegacyDecodeEngine::process_error(mfxSt // This applies to external memory allocations and should not be expected for // a simple internal allocation case like this try { - sess.swap_surface(*this); + sess.swap_decode_surface(*this); return ExecutionStatus::Continue; } catch (const std::runtime_error& ex) { GAPI_LOG_WARNING(nullptr, "[" << sess.session << "] error: " << ex.what()); @@ -358,9 +352,7 @@ ProcessingEngineBase::ExecutionStatus VPLLegacyDecodeEngine::process_error(mfxSt // The decoder detected a new sequence header in the bitstream. // Video parameters may have changed. // In external memory allocation case, might need to reallocate the output surface - /*GAPI_DbgAssert(false && "VPLLegacyDecodeEngine::process_error - " - "MFX_WRN_VIDEO_PARAM_CHANGED is not processed"); - */ + GAPI_LOG_WARNING(nullptr, "[" << sess.session << "] got MFX_WRN_VIDEO_PARAM_CHANGED"); return ExecutionStatus::Continue; break; case MFX_ERR_INCOMPATIBLE_VIDEO_PARAM: @@ -380,7 +372,7 @@ ProcessingEngineBase::ExecutionStatus VPLLegacyDecodeEngine::process_error(mfxSt break; case MFX_WRN_IN_EXECUTION: try { - sess.swap_surface(*this); + sess.swap_decode_surface(*this); return ExecutionStatus::Continue; } catch (const std::runtime_error& ex) { GAPI_LOG_WARNING(nullptr, "[" << sess.session << "] error: " << ex.what()); diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp index 1b7bee6a82..5b4142b693 100644 --- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_engine_legacy.hpp @@ -41,7 +41,6 @@ protected: const std::vector& cfg_params, std::shared_ptr provider); - ExecutionStatus execute_op(operation_t& op, EngineSession& sess) override; ExecutionStatus process_error(mfxStatus status, LegacyDecodeSession& sess); void on_frame_ready(LegacyDecodeSession& sess, diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp index 56e51ffd9f..4d522e6db6 100644 --- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.cpp @@ -22,10 +22,11 @@ namespace onevpl { LegacyDecodeSession::LegacyDecodeSession(mfxSession sess, DecoderParams&& decoder_param, std::shared_ptr provider) : - EngineSession(sess, std::move(decoder_param.stream)), + EngineSession(sess), mfx_decoder_param(std::move(decoder_param.param)), data_provider(std::move(provider)), - procesing_surface_ptr(), + stream(std::move(decoder_param.stream)), + processing_surface_ptr(), sync_queue(), decoded_frames_count() { @@ -37,25 +38,10 @@ LegacyDecodeSession::~LegacyDecodeSession() MFXVideoDECODE_Close(session); } -void LegacyDecodeSession::swap_surface(VPLLegacyDecodeEngine& engine) { +void LegacyDecodeSession::swap_decode_surface(VPLLegacyDecodeEngine& engine) { VPLAccelerationPolicy* acceleration_policy = engine.get_accel(); GAPI_Assert(acceleration_policy && "Empty acceleration_policy"); - try { - auto cand = acceleration_policy->get_free_surface(decoder_pool_id).lock(); - - GAPI_LOG_DEBUG(nullptr, "[" << session << "] swap surface" - ", old: " << (!procesing_surface_ptr.expired() - ? procesing_surface_ptr.lock()->get_handle() - : nullptr) << - ", new: "<< cand->get_handle()); - - procesing_surface_ptr = cand; - } catch (const std::runtime_error& ex) { - GAPI_LOG_WARNING(nullptr, "[" << session << "] error: " << ex.what()); - - // Delegate exception processing on caller - throw; - } + request_free_surface(session, decoder_pool_id, *acceleration_policy, processing_surface_ptr); } void LegacyDecodeSession::init_surface_pool(VPLAccelerationPolicy::pool_key_t key) { @@ -77,6 +63,11 @@ Data::Meta LegacyDecodeSession::generate_frame_meta() { const mfxFrameInfo& LegacyDecodeSession::get_video_param() const { return mfx_decoder_param.mfx.FrameInfo; } + +IDataProvider::mfx_bitstream *LegacyDecodeSession::get_mfx_bitstream_ptr() { + return (data_provider || (stream && stream->DataLength)) ? + stream.get() : nullptr; +} } // namespace onevpl } // namespace wip } // namespace gapi diff --git a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp index 356f9851cd..e87ce6cb65 100644 --- a/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/decode/decode_session.hpp @@ -13,7 +13,6 @@ #include #include "streaming/onevpl/engine/engine_session.hpp" -#include "streaming/onevpl/accelerators/accel_policy_interface.hpp" #ifdef HAVE_ONEVPL #include "streaming/onevpl/onevpl_export.hpp" @@ -21,11 +20,7 @@ namespace cv { namespace gapi { namespace wip { namespace onevpl { - -struct IDataProvider; class Surface; -struct VPLAccelerationPolicy; - class GAPI_EXPORTS LegacyDecodeSession : public EngineSession { public: friend class VPLLegacyDecodeEngine; @@ -35,19 +30,22 @@ public: ~LegacyDecodeSession(); using EngineSession::EngineSession; - void swap_surface(VPLLegacyDecodeEngine& engine); + void swap_decode_surface(VPLLegacyDecodeEngine& engine); void init_surface_pool(VPLAccelerationPolicy::pool_key_t key); Data::Meta generate_frame_meta(); virtual const mfxFrameInfo& get_video_param() const override; + + IDataProvider::mfx_bitstream *get_mfx_bitstream_ptr(); private: mfxVideoParam mfx_decoder_param; - std::shared_ptr data_provider; VPLAccelerationPolicy::pool_key_t decoder_pool_id; - mfxFrameAllocRequest request; + + std::shared_ptr data_provider; + std::shared_ptr stream; protected: - std::weak_ptr procesing_surface_ptr; + std::weak_ptr processing_surface_ptr; using op_handle_t = std::pair; std::queue sync_queue; diff --git a/modules/gapi/src/streaming/onevpl/engine/engine_session.cpp b/modules/gapi/src/streaming/onevpl/engine/engine_session.cpp index 4915b51e34..9a2b812db3 100644 --- a/modules/gapi/src/streaming/onevpl/engine/engine_session.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/engine_session.cpp @@ -14,8 +14,10 @@ namespace gapi { namespace wip { namespace onevpl { -EngineSession::EngineSession(mfxSession sess, std::shared_ptr&& str) : - session(sess), stream(std::move(str)) {} +EngineSession::EngineSession(mfxSession sess) : + session(sess) { +} + EngineSession::~EngineSession() { GAPI_LOG_INFO(nullptr, "Close session: " << session); @@ -26,6 +28,31 @@ std::string EngineSession::error_code_to_str() const { return mfxstatus_to_string(last_status); } + +void EngineSession::request_free_surface(mfxSession session, + VPLAccelerationPolicy::pool_key_t key, + VPLAccelerationPolicy &acceleration_policy, + std::weak_ptr &surface_to_exchange, + bool reset_if_not_found) { + try { + auto cand = acceleration_policy.get_free_surface(key).lock(); + + GAPI_LOG_DEBUG(nullptr, "[" << session << "] swap surface" + ", old: " << (!surface_to_exchange.expired() + ? surface_to_exchange.lock()->get_handle() + : nullptr) << + ", new: "<< cand->get_handle()); + + surface_to_exchange = cand; + } catch (const std::runtime_error& ex) { + GAPI_LOG_WARNING(nullptr, "[" << session << "] error: " << ex.what()); + if (reset_if_not_found) { + surface_to_exchange.reset(); + } + // Delegate exception processing on caller side + throw; + } +} } // namespace onevpl } // namespace wip } // namespace gapi diff --git a/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp b/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp index 8a1f4383eb..d38dad3812 100644 --- a/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/engine_session.hpp @@ -16,6 +16,8 @@ #include "opencv2/gapi/own/exports.hpp" // GAPI_EXPORTS #include +#include "streaming/onevpl/data_provider_defines.hpp" +#include "streaming/onevpl/accelerators/accel_policy_interface.hpp" #ifdef HAVE_ONEVPL #include "streaming/onevpl/onevpl_export.hpp" @@ -38,15 +40,19 @@ struct GAPI_EXPORTS TranscoderParams { struct GAPI_EXPORTS EngineSession { mfxSession session; - std::shared_ptr stream; - mfxSyncPoint sync; mfxStatus last_status; - EngineSession(mfxSession sess, std::shared_ptr&& str); + EngineSession(mfxSession sess); std::string error_code_to_str() const; virtual ~EngineSession(); virtual const mfxFrameInfo& get_video_param() const = 0; + + static void request_free_surface(mfxSession session, + VPLAccelerationPolicy::pool_key_t key, + VPLAccelerationPolicy &acceleration_policy, + std::weak_ptr &surface_to_exchange, + bool reset_if_not_found = false); }; } // namespace onevpl } // namespace wip diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.cpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.cpp new file mode 100644 index 0000000000..5a08f2bd09 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.cpp @@ -0,0 +1,105 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#include +#include + +#include +#include "streaming/onevpl/engine/preproc/preproc_dispatcher.hpp" + +#ifdef HAVE_ONEVPL +#include "streaming/onevpl/onevpl_export.hpp" +#include "streaming/onevpl/engine/preproc/preproc_engine.hpp" +#include "streaming/onevpl/engine/preproc/preproc_session.hpp" + +#include "streaming/onevpl/accelerators/accel_policy_interface.hpp" +#include "streaming/onevpl/accelerators/surface/surface.hpp" +#include "streaming/onevpl/cfg_params_parser.hpp" +#endif // HAVE_ONEVPL + +#include "logger.hpp" + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { +#ifdef HAVE_ONEVPL +cv::util::optional VPPPreprocDispatcher::is_applicable(const cv::MediaFrame& in_frame) { + cv::util::optional param; + GAPI_LOG_DEBUG(nullptr, "workers: " << workers.size()); + bool worker_found = false; + for (const auto &w : workers) { + param = w->is_applicable(in_frame); + if (param.has_value()) { + auto &vpp_param = param.value().get(); + BaseFrameAdapter* adapter = reinterpret_cast(vpp_param.reserved); + const IDeviceSelector::DeviceScoreTable &devs = + (std::static_pointer_cast(w))->get_accel()->get_device_selector()->select_devices(); + GAPI_DbgAssert(devs.size() >= 1 && "Invalid device selector"); + auto worker_accel_type = std::get<1>(*devs.begin()).get_type(); + GAPI_LOG_DEBUG(nullptr, "acceleration types for frame: " << to_cstring(adapter->accel_type()) << + ", for worker: " << to_cstring(worker_accel_type)); + if (worker_accel_type == adapter->accel_type()){ + vpp_param.reserved = reinterpret_cast(w.get()); + GAPI_LOG_DEBUG(nullptr, "selected worker: " << vpp_param.reserved); + worker_found = true; + break; + } + } + } + return worker_found ? param : cv::util::optional{}; +} + +pp_session VPPPreprocDispatcher::initialize_preproc(const pp_params& initial_frame_param, + const GFrameDesc& required_frame_descr) { + const auto &vpp_param = initial_frame_param.get(); + GAPI_LOG_DEBUG(nullptr, "workers: " << workers.size()); + for (auto &w : workers) { + if (reinterpret_cast(w.get()) == vpp_param.reserved) { + pp_session sess = w->initialize_preproc(initial_frame_param, required_frame_descr); + vpp_pp_session &vpp_sess = sess.get(); + vpp_sess.reserved = reinterpret_cast(w.get()); + GAPI_LOG_DEBUG(nullptr, "initialized session preproc for worker: " << vpp_sess.reserved); + return sess; + } + } + GAPI_Assert(false && "Cannot initialize VPP preproc in dispatcher, no suitable worker"); +} + +cv::MediaFrame VPPPreprocDispatcher::run_sync(const pp_session &session_handle, + const cv::MediaFrame& in_frame, + const cv::util::optional &opt_roi) { + const auto &vpp_sess = session_handle.get(); + GAPI_LOG_DEBUG(nullptr, "workers: " << workers.size()); + for (auto &w : workers) { + if (reinterpret_cast(w.get()) == vpp_sess.reserved) { + GAPI_LOG_DEBUG(nullptr, "trigger execution on worker: " << vpp_sess.reserved); + return w->run_sync(session_handle, in_frame, opt_roi); + } + } + GAPI_Assert(false && "Cannot invoke VPP preproc in dispatcher, no suitable worker"); +} + +#else // HAVE_ONEVPL +cv::util::optional VPPPreprocDispatcher::is_applicable(const cv::MediaFrame&) { + return cv::util::optional{}; +} + +pp_session VPPPreprocDispatcher::initialize_preproc(const pp_params&, + const GFrameDesc&) { + GAPI_Assert(false && "Unsupported: G-API compiled without `WITH_GAPI_ONEVPL=ON`"); +} + +cv::MediaFrame VPPPreprocDispatcher::run_sync(const pp_session &, + const cv::MediaFrame&, + const cv::util::optional &) { + GAPI_Assert(false && "Unsupported: G-API compiled without `WITH_GAPI_ONEVPL=ON`"); +} +#endif // HAVE_ONEVPL +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.hpp new file mode 100644 index 0000000000..ea808bd542 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_dispatcher.hpp @@ -0,0 +1,48 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifndef GAPI_STREAMING_ONEVPL_PREPROC_DISPATCHER_HPP +#define GAPI_STREAMING_ONEVPL_PREPROC_DISPATCHER_HPP + +#include +#include + +#include "streaming/onevpl/engine/preproc_engine_interface.hpp" + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { + +// GAPI_EXPORTS for tests +class GAPI_EXPORTS VPPPreprocDispatcher final : public cv::gapi::wip::IPreprocEngine { +public: + + cv::util::optional is_applicable(const cv::MediaFrame& in_frame) override; + + pp_session initialize_preproc(const pp_params& initial_frame_param, + const GFrameDesc& required_frame_descr) override; + + cv::MediaFrame run_sync(const pp_session &session_handle, + const cv::MediaFrame& in_frame, + const cv::util::optional &opt_roi) override; + + template + void insert_worker(Args&& ...args) { + workers.emplace_back(std::make_shared(std::forward(args)...)); + } + + size_t size() const { + return workers.size(); + } +private: + std::vector> workers; +}; +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // GAPI_STREAMING_ONEVPL_PREPROC_DISPATCHER_HPP diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp new file mode 100644 index 0000000000..2e9a9bebce --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.cpp @@ -0,0 +1,472 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifdef HAVE_ONEVPL + +#include +#include + +#include + +#include "streaming/onevpl/engine/preproc/preproc_engine.hpp" +#include "streaming/onevpl/engine/preproc/preproc_session.hpp" + +#include "streaming/onevpl/accelerators/accel_policy_interface.hpp" +#include "streaming/onevpl/accelerators/surface/surface.hpp" +#include "streaming/onevpl/cfg_params_parser.hpp" +#include "logger.hpp" + +#define ALIGN16(value) (((value + 15) >> 4) << 4) + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { + +bool FrameInfoComparator::operator()(const mfxFrameInfo& lhs, const mfxFrameInfo& rhs) const { + return lhs < rhs; +} + +bool FrameInfoComparator::equal_to(const mfxFrameInfo& lhs, const mfxFrameInfo& rhs) { + return lhs == rhs; +} + +void apply_roi(mfxFrameSurface1* surface_handle, + const cv::util::optional &opt_roi) { + if (opt_roi.has_value()) { + const cv::Rect &roi = opt_roi.value(); + surface_handle->Info.CropX = static_cast(roi.x); + surface_handle->Info.CropY = static_cast(roi.y); + surface_handle->Info.CropW = static_cast(roi.width); + surface_handle->Info.CropH = static_cast(roi.height); + GAPI_LOG_DEBUG(nullptr, "applied ROI {" << surface_handle->Info.CropX << + ", " << surface_handle->Info.CropY << "}, " + "{ " << surface_handle->Info.CropX + surface_handle->Info.CropW << + ", " << surface_handle->Info.CropY + surface_handle->Info.CropH << "}"); + } +} + +VPPPreprocEngine::VPPPreprocEngine(std::unique_ptr&& accel) : + ProcessingEngineBase(std::move(accel)) { + GAPI_LOG_DEBUG(nullptr, "Create VPP preprocessing engine"); + preprocessed_frames_count = 0; + create_pipeline( + // 0) preproc decoded surface with VPP params + [this] (EngineSession& sess) -> ExecutionStatus + { + session_type &my_sess = static_cast(sess); + while (!my_sess.sync_in_queue.empty()) { + do { + if (!my_sess.processing_surface_ptr.expired()) { + session_type::incoming_task pending_op = my_sess.sync_in_queue.front(); + GAPI_LOG_DEBUG(nullptr, "pending IN operations count: " << + my_sess.sync_in_queue.size() << + ", sync id: " << + pending_op.sync_handle << + ", surface: " << + pending_op.decoded_surface_ptr); + + my_sess.sync_in_queue.pop(); + auto *vpp_suface = my_sess.processing_surface_ptr.lock()->get_handle(); + + apply_roi(pending_op.decoded_surface_ptr, pending_op.roi); + + mfxSyncPoint vpp_sync_handle{}; + my_sess.last_status = MFXVideoVPP_RunFrameVPPAsync(my_sess.session, + pending_op.decoded_surface_ptr, + vpp_suface, + nullptr, &vpp_sync_handle); + session_type::outgoing_task vpp_pending_op {vpp_sync_handle, + vpp_suface, + std::move(pending_op) }; + GAPI_LOG_DEBUG(nullptr, "Got VPP async operation" << + ", sync id: " << + vpp_pending_op.sync_handle << + ", dec surface: " << + vpp_pending_op.original_surface_ptr << + ", trans surface: " << + vpp_pending_op.vpp_surface_ptr << + ", status: " << + mfxstatus_to_string(my_sess.last_status)); + // NB: process status + if (my_sess.last_status == MFX_ERR_MORE_SURFACE || + my_sess.last_status == MFX_ERR_NONE) { + vpp_pending_op.vpp_surface_ptr->Data.Locked++; // TODO -S- workaround + my_sess.vpp_out_queue.emplace(vpp_pending_op); + } + } + + try { + my_sess.swap_surface(*this); + } catch (const std::runtime_error& ex) { + // NB: not an error, yield CPU ticks to check + // surface availability at a next phase. + // But print WARNING to notify user about pipeline stuck + GAPI_LOG_WARNING(nullptr, "[" << my_sess.session << + "] has no VPP surface, reason: " << + ex.what()); + my_sess.processing_surface_ptr.reset(); + break; + } + } while(my_sess.last_status == MFX_ERR_MORE_SURFACE); + + if (my_sess.processing_surface_ptr.expired()) { + // TODO break main loop + break; + } + } + return ExecutionStatus::Continue; + }, + // 1) Wait for ASYNC decode result + [this] (EngineSession& sess) -> ExecutionStatus + { + session_type& my_sess = static_cast(sess); + do { + if (!my_sess.vpp_out_queue.empty()) { // FIFO: check the oldest async operation complete + session_type::outgoing_task& pending_op = my_sess.vpp_out_queue.front(); + sess.last_status = MFXVideoCORE_SyncOperation(sess.session, pending_op.sync_handle, 0); + + GAPI_LOG_DEBUG(nullptr, "pending VPP operations count: " << + my_sess.vpp_out_queue.size() << + ", sync id: " << + pending_op.sync_handle << + ", surface: " << + pending_op.vpp_surface_ptr << + ", status: " << + mfxstatus_to_string(my_sess.last_status)); + + // put frames in ready queue on success + if (MFX_ERR_NONE == sess.last_status) { + pending_op.release_frame(); + on_frame_ready(my_sess, pending_op.vpp_surface_ptr); + } + } + } while (MFX_ERR_NONE == sess.last_status && !my_sess.vpp_out_queue.empty()); + return ExecutionStatus::Continue; + }, + // 2) Falls back on generic status processing + [this] (EngineSession& sess) -> ExecutionStatus + { + return this->process_error(sess.last_status, static_cast(sess)); + } + ); +} + +cv::util::optional VPPPreprocEngine::is_applicable(const cv::MediaFrame& in_frame) { + // TODO consider something smarter than RTI + cv::util::optional ret; + BaseFrameAdapter *vpl_adapter = in_frame.get(); + GAPI_LOG_DEBUG(nullptr, "validate VPP preprocessing is applicable for frame"); + if (vpl_adapter) { + ret = cv::util::make_optional( + pp_params::create(vpl_adapter->get_session_handle(), + vpl_adapter->get_surface()->get_info(), + vpl_adapter)); + GAPI_LOG_DEBUG(nullptr, "VPP preprocessing applicable, session [" << + vpl_adapter->get_session_handle() << "]"); + } + return ret; +} + +pp_session VPPPreprocEngine::initialize_preproc(const pp_params& initial_frame_param, + const GFrameDesc& required_frame_descr) { + const vpp_pp_params ¶ms = initial_frame_param.get(); + + // adjust preprocessing settings + mfxVideoParam mfxVPPParams{0}; + // NB: IN params for VPP session must be equal to decoded surface params + mfxVPPParams.vpp.In = params.info; + + // NB: OUT params must refer to IN params of a network + GAPI_LOG_DEBUG(nullptr, "network input size: " << required_frame_descr.size.width << + "x" << required_frame_descr.size.height); + mfxVPPParams.vpp.Out = mfxVPPParams.vpp.In; + switch (required_frame_descr.fmt) { + case MediaFormat::NV12: + mfxVPPParams.vpp.Out.FourCC = MFX_FOURCC_NV12; + break; + default: + GAPI_LOG_WARNING(nullptr, "Unsupported MediaFormat in preprocessing: " << + static_cast(required_frame_descr.fmt) << + ". Frame will be rejected"); + throw std::runtime_error("unsupported MediaFormat value in VPP preprocessing"); + } + + mfxVPPParams.vpp.Out.ChromaFormat = MFX_CHROMAFORMAT_YUV420; + mfxVPPParams.vpp.Out.Width = static_cast(required_frame_descr.size.width); + mfxVPPParams.vpp.Out.Height = static_cast(required_frame_descr.size.height); + mfxVPPParams.vpp.Out.CropW = mfxVPPParams.vpp.Out.Width; + mfxVPPParams.vpp.Out.CropH = mfxVPPParams.vpp.Out.Height; + + // check In & Out equally to bypass preproc + if (mfxVPPParams.vpp.Out == mfxVPPParams.vpp.In) { + GAPI_LOG_DEBUG(nullptr, "no preproc required"); + return pp_session::create(nullptr); + } + + // recalculate size param according to VPP alignment + mfxVPPParams.vpp.Out.Width = ALIGN16(mfxVPPParams.vpp.Out.Width); + mfxVPPParams.vpp.Out.Height = ALIGN16(mfxVPPParams.vpp.Out.Height); + mfxVPPParams.vpp.Out.CropW = mfxVPPParams.vpp.Out.Width; + mfxVPPParams.vpp.Out.CropH = mfxVPPParams.vpp.Out.Height; + + GAPI_LOG_DEBUG(nullptr, "\nFrom:\n{\n" << mfx_frame_info_to_string(mfxVPPParams.vpp.In) << + "}\nTo:\n{\n" << mfx_frame_info_to_string(mfxVPPParams.vpp.Out) << "}"); + + // find existing session + GAPI_LOG_DEBUG(nullptr, "Find existing VPPPreprocSession for requested frame params" + ", total sessions: " << preproc_session_map.size()); + auto it = preproc_session_map.find(mfxVPPParams.vpp.In); + if (it != preproc_session_map.end()) { + GAPI_LOG_DEBUG(nullptr, "[" << it->second->session << "] found"); + return pp_session::create(std::static_pointer_cast(it->second)); + } + + // NB: make some sanity checks + IDeviceSelector::DeviceScoreTable devices = acceleration_policy->get_device_selector()->select_devices(); + GAPI_Assert(devices.size() == 1 && "Multiple(or zero) acceleration devices case is unsupported"); + AccelType accel_type = devices.begin()->second.get_type(); + // assign acceleration + if (accel_type == AccelType::DX11) { + mfxVPPParams.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY; + } else { + mfxVPPParams.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY | MFX_IOPATTERN_OUT_SYSTEM_MEMORY; + } + + // clone existing VPL session to inherit VPL loader configuration + // and avoid refer to any global state + // TODO no clone due to clone issue + + mfxSession mfx_vpp_session = params.handle; + mfxStatus sts = MFX_ERR_NONE; + + // TODO: simply use clone after VPL bug fixing + //sts = MFXCloneSession(params.handle, &mfx_vpp_session); + sts = MFXCreateSession(mfx_handle, impl_number, &mfx_vpp_session); + if (sts != MFX_ERR_NONE) { + GAPI_LOG_WARNING(nullptr, "Cannot clone VPP session, error: " << mfxstatus_to_string(sts)); + GAPI_Assert(false && "Cannot continue VPP preprocessing"); + } + + sts = MFXJoinSession(params.handle, mfx_vpp_session); + if (sts != MFX_ERR_NONE) { + GAPI_LOG_WARNING(nullptr, "Cannot join VPP sessions, error: " << mfxstatus_to_string(sts)); + GAPI_Assert(false && "Cannot continue VPP preprocessing"); + } + + GAPI_LOG_INFO(nullptr, "[" << mfx_vpp_session << "] starting pool allocation"); + VPLAccelerationPolicy::pool_key_t vpp_out_pool_key {}; + try { + // assign HW acceleration processor + acceleration_policy->init(mfx_vpp_session); + try { + // ask to allocate external memory pool + mfxFrameAllocRequest vppRequests[2]; + memset(&vppRequests, 0, sizeof(mfxFrameAllocRequest) * 2); + sts = MFXVideoVPP_QueryIOSurf(mfx_vpp_session, &mfxVPPParams, vppRequests); + if (MFX_ERR_NONE != sts) { + GAPI_LOG_WARNING(nullptr, "cannot execute MFXVideoVPP_QueryIOSurf, error: " << + mfxstatus_to_string(sts)); + throw std::runtime_error("Cannot execute MFXVideoVPP_QueryIOSurf"); + } + + // NB: Assign ID as upper limit descendant to distinguish specific VPP allocation + // from decode allocations witch started from 0: by local module convention + + static uint16_t request_id = 0; + vppRequests[1].AllocId = std::numeric_limits::max() - request_id++; + GAPI_Assert(request_id != std::numeric_limits::max() && "Something wrong"); + + vppRequests[1].Type |= MFX_MEMTYPE_FROM_VPPIN; + vpp_out_pool_key = acceleration_policy->create_surface_pool(vppRequests[1], + mfxVPPParams.vpp.Out); + + sts = MFXVideoVPP_Init(mfx_vpp_session, &mfxVPPParams); + if (MFX_ERR_NONE != sts) { + GAPI_LOG_WARNING(nullptr, "cannot Init VPP, error: " << + mfxstatus_to_string(sts)); + // TODO consider deallocate pool + // but not necessary now cause every fail processed as GAPI_Assert + throw std::runtime_error("Cannot init VPP, error: " + + mfxstatus_to_string(sts)); + } + } catch (const std::exception&) { + GAPI_LOG_WARNING(nullptr, "[" << mfx_vpp_session << "] allocation failed, rollback"); + acceleration_policy->deinit(mfx_vpp_session); + throw; + } + } catch (const std::exception&) { + MFXClose(mfx_vpp_session); + GAPI_Assert(false && "Cannot init preproc resources"); + } + + // create engine session after all + session_ptr_type sess_ptr = register_session(mfx_vpp_session, + mfxVPPParams); + sess_ptr->init_surface_pool(vpp_out_pool_key); + sess_ptr->swap_surface(*this); + + bool inserted = preproc_session_map.emplace(mfxVPPParams.vpp.In, sess_ptr).second; + GAPI_Assert(inserted && "preproc session is exist"); + GAPI_LOG_INFO(nullptr, "VPPPreprocSession created, total sessions: " << preproc_session_map.size()); + return pp_session::create(std::static_pointer_cast(sess_ptr)); +} + +void VPPPreprocEngine::on_frame_ready(session_type& sess, + mfxFrameSurface1* ready_surface) +{ + GAPI_LOG_DEBUG(nullptr, "[" << sess.session << "], frame ready"); + + // manage memory ownership rely on acceleration policy + ready_surface->Data.Locked--; // TODO -S- workaround + VPLAccelerationPolicy::FrameConstructorArgs args{ready_surface, sess.session}; + auto frame_adapter = acceleration_policy->create_frame_adapter(sess.vpp_pool_id, + args); + ready_frames.emplace(cv::MediaFrame(std::move(frame_adapter)), sess.generate_frame_meta()); + + // pop away synced out object + sess.vpp_out_queue.pop(); +} + +VPPPreprocEngine::session_ptr +VPPPreprocEngine::initialize_session(mfxSession, + const std::vector&, + std::shared_ptr) { + return {}; +} + +cv::MediaFrame VPPPreprocEngine::run_sync(const pp_session& sess, const cv::MediaFrame& in_frame, + const cv::util::optional &roi) { + vpp_pp_session pp_sess_impl = sess.get(); + if (!pp_sess_impl.handle) { + // bypass case + return in_frame; + } + session_ptr_type s = std::static_pointer_cast(pp_sess_impl.handle); + GAPI_DbgAssert(s && "Session is nullptr"); + GAPI_DbgAssert(is_applicable(in_frame) && + "VPP preproc is not applicable for the given frame"); + BaseFrameAdapter *vpl_adapter = in_frame.get(); + if (!vpl_adapter) { + GAPI_LOG_WARNING(nullptr, "VPP preproc is inapplicable for a given frame. " + "Make sure the frame is collected using onevpl::GSource"); + throw std::runtime_error("VPP preproc is inapplicable for given frame"); + } + + // schedule decoded surface into preproc queue + session_type::incoming_task in_preproc_request {nullptr, + vpl_adapter->get_surface()->get_handle(), + vpl_adapter->get_surface()->get_info(), + in_frame, + roi}; + s->sync_in_queue.emplace(in_preproc_request); + + // invoke pipeline to transform decoded surface into preprocessed surface + try + { + ExecutionStatus status = ExecutionStatus::Continue; + while (0 == get_ready_frames_count() && + status == ExecutionStatus::Continue) { + status = process(s->session); + } + + if (get_ready_frames_count() == 0) { + GAPI_LOG_WARNING(nullptr, "failed: cannot obtain preprocessed frames, last status: " << + ProcessingEngineBase::status_to_string(status)); + throw std::runtime_error("cannot finalize VPP preprocessing operation"); + } + } catch(const std::exception&) { + throw; + } + // obtain new frame is available + cv::gapi::wip::Data data; + get_frame(data); + preprocessed_frames_count++; + GAPI_LOG_DEBUG(nullptr, "processed frames count: " << preprocessed_frames_count); + return cv::util::get(data); +} + +ProcessingEngineBase::ExecutionStatus VPPPreprocEngine::process_error(mfxStatus status, session_type& sess) { + GAPI_LOG_DEBUG(nullptr, "status: " << mfxstatus_to_string(status)); + + switch (status) { + case MFX_ERR_NONE: + { + // prepare sync object for new surface + try { + sess.swap_surface(*this); + return ExecutionStatus::Continue; + } catch (const std::runtime_error& ex) { + GAPI_LOG_WARNING(nullptr, "[" << sess.session << "] error: " << ex.what()); + return ExecutionStatus::Continue; // read more data + } + } + case MFX_ERR_MORE_DATA: // The function requires more bitstream at input before decoding can proceed + return ExecutionStatus::Processed; + case MFX_ERR_MORE_SURFACE: + { + // The function requires more frame surface at output before decoding can proceed. + // This applies to external memory allocations and should not be expected for + // a simple internal allocation case like this + try { + sess.swap_surface(*this); + return ExecutionStatus::Continue; + } catch (const std::runtime_error& ex) { + GAPI_LOG_WARNING(nullptr, "[" << sess.session << "] error: " << ex.what()); + return ExecutionStatus::Continue; // read more data + } + break; + } + case MFX_ERR_DEVICE_LOST: + // For non-CPU implementations, + // Cleanup if device is lost + GAPI_DbgAssert(false && "VPPPreprocEngine::process_error - " + "MFX_ERR_DEVICE_LOST is not processed"); + break; + case MFX_WRN_DEVICE_BUSY: + // For non-CPU implementations, + // Wait a few milliseconds then try again + GAPI_DbgAssert(false && "VPPPreprocEngine::process_error - " + "MFX_WRN_DEVICE_BUSY is not processed"); + break; + case MFX_WRN_VIDEO_PARAM_CHANGED: + // The decoder detected a new sequence header in the bitstream. + // Video parameters may have changed. + // In external memory allocation case, might need to reallocate the output surface + GAPI_LOG_WARNING(nullptr, "[" << sess.session << "] got MFX_WRN_VIDEO_PARAM_CHANGED"); + return ExecutionStatus::Continue; + break; + case MFX_ERR_INCOMPATIBLE_VIDEO_PARAM: + // The function detected that video parameters provided by the application + // are incompatible with initialization parameters. + // The application should close the component and then reinitialize it + GAPI_DbgAssert(false && "VPPPreprocEngine::process_error - " + "MFX_ERR_INCOMPATIBLE_VIDEO_PARAM is not processed"); + break; + case MFX_ERR_REALLOC_SURFACE: + // Bigger surface_work required. May be returned only if + // mfxInfoMFX::EnableReallocRequest was set to ON during initialization. + // This applies to external memory allocations and should not be expected for + // a simple internal allocation case like this + GAPI_DbgAssert(false && "VPPPreprocEngine::process_error - " + "MFX_ERR_REALLOC_SURFACE is not processed"); + break; + case MFX_WRN_IN_EXECUTION: + GAPI_LOG_DEBUG(nullptr, "[" << sess.session << "] got MFX_WRN_IN_EXECUTION"); + return ExecutionStatus::Continue; + default: + GAPI_LOG_WARNING(nullptr, "Unknown status code: " << mfxstatus_to_string(status) << + ", decoded frames: " << sess.preprocessed_frames_count); + break; + } + + return ExecutionStatus::Failed; +} +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // HAVE_ONEVPL diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.hpp new file mode 100644 index 0000000000..b1d0cee264 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_engine.hpp @@ -0,0 +1,68 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifndef GAPI_STREAMING_ONVPL_PREPROC_ENGINE_HPP +#define GAPI_STREAMING_ONVPL_PREPROC_ENGINE_HPP +#include +#include +#include + +#include "streaming/onevpl/engine/processing_engine_base.hpp" +#include "streaming/onevpl/accelerators/utils/shared_lock.hpp" + +#include "streaming/onevpl/engine/preproc_engine_interface.hpp" + +#ifdef HAVE_ONEVPL +#include "streaming/onevpl/onevpl_export.hpp" + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { +// GAPI_EXPORTS for tests +struct GAPI_EXPORTS FrameInfoComparator { + bool operator()(const mfxFrameInfo& lhs, const mfxFrameInfo& rhs) const; + static bool equal_to(const mfxFrameInfo& lhs, const mfxFrameInfo& rhs); +}; + +class VPPPreprocSession; +struct IDataProvider; +struct VPLAccelerationPolicy; + +// GAPI_EXPORTS for tests +class GAPI_EXPORTS VPPPreprocEngine final : public ProcessingEngineBase, + public cv::gapi::wip::IPreprocEngine { +public: + using session_type = VPPPreprocSession; + using session_ptr_type = std::shared_ptr; + + VPPPreprocEngine(std::unique_ptr&& accel); + + cv::util::optional is_applicable(const cv::MediaFrame& in_frame) override; + + pp_session initialize_preproc(const pp_params& initial_frame_param, + const GFrameDesc& required_frame_descr) override; + + cv::MediaFrame run_sync(const pp_session &session_handle, + const cv::MediaFrame& in_frame, + const cv::util::optional &opt_roi) override; + +private: + std::map preproc_session_map; + void on_frame_ready(session_type& sess, + mfxFrameSurface1* ready_surface); + ExecutionStatus process_error(mfxStatus status, session_type& sess); + session_ptr initialize_session(mfxSession mfx_session, + const std::vector& cfg_params, + std::shared_ptr provider) override; + size_t preprocessed_frames_count; +}; +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // HAVE_ONEVPL +#endif // GAPI_STREAMING_ONVPL_PREPROC_ENGINE_HPP diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.cpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.cpp new file mode 100644 index 0000000000..2695a26049 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.cpp @@ -0,0 +1,85 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifdef HAVE_ONEVPL + +#include +#include + +#include "streaming/onevpl/engine/preproc/preproc_session.hpp" +#include "streaming/onevpl/engine/preproc/preproc_engine.hpp" +#include "streaming/onevpl/accelerators/surface/surface.hpp" +#include "streaming/onevpl/utils.hpp" +#include "logger.hpp" + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { +VPPPreprocSession::VPPPreprocSession(mfxSession sess, const mfxVideoParam& vpp_out_param) : + EngineSession(sess), + mfx_vpp_out_param(vpp_out_param), + processing_surface_ptr(), + sync_in_queue(), + vpp_out_queue(), + preprocessed_frames_count() +{ +} + +VPPPreprocSession::~VPPPreprocSession() { + GAPI_LOG_INFO(nullptr, "Close VPP for session: " << session); + MFXVideoVPP_Close(session); +} + +Data::Meta VPPPreprocSession::generate_frame_meta() { + const auto now = std::chrono::system_clock::now(); + const auto dur = std::chrono::duration_cast + (now.time_since_epoch()); + Data::Meta meta { + {cv::gapi::streaming::meta_tag::timestamp, int64_t{dur.count()} }, + {cv::gapi::streaming::meta_tag::seq_id, int64_t{preprocessed_frames_count++}} + }; + return meta; +} + +void VPPPreprocSession::swap_surface(VPPPreprocEngine& engine) { + VPLAccelerationPolicy* acceleration_policy = engine.get_accel(); + GAPI_Assert(acceleration_policy && "Empty acceleration_policy"); + request_free_surface(session, vpp_pool_id, *acceleration_policy, + processing_surface_ptr, true); +} + +void VPPPreprocSession::init_surface_pool(VPLAccelerationPolicy::pool_key_t key) { + GAPI_Assert(key && "Init preproc pull with empty key"); + vpp_pool_id = key; +} + +const mfxFrameInfo& VPPPreprocSession::get_video_param() const { + return mfx_vpp_out_param.vpp.Out; +} + +VPPPreprocSession::outgoing_task::outgoing_task(mfxSyncPoint acquired_sync_handle, + mfxFrameSurface1* acquired_surface_ptr, + VPPPreprocSession::incoming_task &&in) : + sync_handle(acquired_sync_handle), + vpp_surface_ptr(acquired_surface_ptr), + original_surface_ptr(in.decoded_surface_ptr), + original_frame_info(std::move(in.decoded_frame_info)), + original_frame(in.decoded_frame_copy) { +} + +void VPPPreprocSession::outgoing_task::release_frame() { + // restore initial surface params + memcpy(&(original_surface_ptr->Info), + &original_frame_info, sizeof(Surface::info_t)); + // release references on frame adapter + original_frame = cv::MediaFrame(); +} +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // HAVE_ONEVPL diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.hpp new file mode 100644 index 0000000000..b6800c3f76 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/preproc_session.hpp @@ -0,0 +1,73 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifndef GAPI_STREAMING_ONVPL_PREPROC_SESSION_HPP +#define GAPI_STREAMING_ONVPL_PREPROC_SESSION_HPP +#include +#include + +#include +#include "streaming/onevpl/engine/engine_session.hpp" +#include "streaming/onevpl/accelerators/accel_policy_interface.hpp" +#include "streaming/onevpl/engine/preproc/vpp_preproc_defines.hpp" + +#ifdef HAVE_ONEVPL + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { +class VPPPreprocEngine; + +class VPPPreprocSession : public EngineSession { +public: + friend class VPPPreprocEngine; + VPPPreprocSession(mfxSession sess, const mfxVideoParam &vpp_out_param); + ~VPPPreprocSession(); + + Data::Meta generate_frame_meta(); + void swap_surface(VPPPreprocEngine& engine); + void init_surface_pool(VPLAccelerationPolicy::pool_key_t key); + + virtual const mfxFrameInfo& get_video_param() const override; +private: + mfxVideoParam mfx_vpp_out_param; + VPLAccelerationPolicy::pool_key_t vpp_pool_id; + std::weak_ptr processing_surface_ptr; + + struct incoming_task { + mfxSyncPoint sync_handle; + mfxFrameSurface1* decoded_surface_ptr; + Surface::info_t decoded_frame_info; + cv::MediaFrame decoded_frame_copy; + cv::util::optional roi; + }; + + struct outgoing_task { + outgoing_task() = default; + outgoing_task(mfxSyncPoint acquired_sync_handle, + mfxFrameSurface1* acquired_surface_ptr, + incoming_task &&in); + mfxSyncPoint sync_handle; + mfxFrameSurface1* vpp_surface_ptr; + + mfxFrameSurface1* original_surface_ptr; + void release_frame(); + private: + Surface::info_t original_frame_info; + cv::MediaFrame original_frame; + }; + + std::queue sync_in_queue; + std::queue vpp_out_queue; + int64_t preprocessed_frames_count; +}; +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // HAVE_ONEVPL +#endif // GAPI_STREAMING_ONVPL_PREPROC_SESSION_HPP diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/utils.cpp b/modules/gapi/src/streaming/onevpl/engine/preproc/utils.cpp new file mode 100644 index 0000000000..6cf7212f3e --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/utils.cpp @@ -0,0 +1,86 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#include + +#include "streaming/onevpl/engine/preproc/utils.hpp" + +#ifdef HAVE_ONEVPL +#include "streaming/onevpl/onevpl_export.hpp" +#include "logger.hpp" + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { +namespace utils { + +cv::MediaFormat fourcc_to_MediaFormat(int value) { + switch (value) + { + case MFX_FOURCC_BGRP: + return cv::MediaFormat::BGR; + case MFX_FOURCC_NV12: + return cv::MediaFormat::NV12; + default: + GAPI_LOG_WARNING(nullptr, "Unsupported FourCC format requested: " << value << + ". Cannot cast to cv::MediaFrame"); + GAPI_Assert(false && "Unsupported FOURCC"); + + } +} + +int MediaFormat_to_fourcc(cv::MediaFormat value) { + switch (value) + { + case cv::MediaFormat::BGR: + return MFX_FOURCC_BGRP; + case cv::MediaFormat::NV12: + return MFX_FOURCC_NV12; + default: + GAPI_LOG_WARNING(nullptr, "Unsupported cv::MediaFormat format requested: " << + static_cast::type>(value) << + ". Cannot cast to FourCC"); + GAPI_Assert(false && "Unsupported cv::MediaFormat"); + } +} +int MediaFormat_to_chroma(cv::MediaFormat value) { + switch (value) + { + case cv::MediaFormat::BGR: + return MFX_CHROMAFORMAT_MONOCHROME; + case cv::MediaFormat::NV12: + return MFX_CHROMAFORMAT_YUV420; + default: + GAPI_LOG_WARNING(nullptr, "Unsupported cv::MediaFormat format requested: " << + static_cast::type>(value) << + ". Cannot cast to ChromaFormateIdc"); + GAPI_Assert(false && "Unsupported cv::MediaFormat"); + } +} + +mfxFrameInfo to_mfxFrameInfo(const cv::GFrameDesc& frame_info) { + mfxFrameInfo ret {0}; + ret.FourCC = MediaFormat_to_fourcc(frame_info.fmt); + ret.ChromaFormat = MediaFormat_to_chroma(frame_info.fmt); + ret.Width = frame_info.size.width; + ret.Height = frame_info.size.height; + ret.CropX = 0; + ret.CropY = 0; + ret.CropW = 0; + ret.CropH = 0; + ret.PicStruct = MFX_PICSTRUCT_UNKNOWN; + ret.FrameRateExtN = 0; + ret.FrameRateExtD = 0; + return ret; +} +} // namespace utils +} // namespace cv +} // namespace gapi +} // namespace wip +} // namespace onevpl + +#endif // HAVE_ONEVPL diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/utils.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc/utils.hpp new file mode 100644 index 0000000000..b52a8ad1e8 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/utils.hpp @@ -0,0 +1,32 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifndef GAPI_STREAMING_ONEVPL_PREPROC_UTILS_HPP +#define GAPI_STREAMING_ONEVPL_PREPROC_UTILS_HPP + +#ifdef HAVE_ONEVPL +#include "streaming/onevpl/onevpl_export.hpp" + +#include + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { +namespace utils { + +cv::MediaFormat fourcc_to_MediaFormat(int value); +int MediaFormat_to_fourcc(cv::MediaFormat value); +int MediaFormat_to_chroma(cv::MediaFormat value); + +mfxFrameInfo to_mfxFrameInfo(const cv::GFrameDesc& frame_info); +} // namespace utils +} // namespace cv +} // namespace gapi +} // namespace wip +} // namespace onevpl +#endif // #ifdef HAVE_ONEVPL +#endif // GAPI_STREAMING_ONEVPL_PREPROC_UTILS_HPP diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc/vpp_preproc_defines.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc/vpp_preproc_defines.hpp new file mode 100644 index 0000000000..780c9cf5d7 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc/vpp_preproc_defines.hpp @@ -0,0 +1,33 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifdef HAVE_ONEVPL + +#ifndef VPP_PREPROC_ENGINE +#define VPP_PREPROC_ENGINE +#include "streaming/onevpl/onevpl_export.hpp" +#include "streaming/onevpl/engine/engine_session.hpp" + +namespace cv { +namespace gapi { +namespace wip { +namespace onevpl { +struct vpp_pp_params { + mfxSession handle; + mfxFrameInfo info; + void *reserved = nullptr; +}; + +struct vpp_pp_session { + std::shared_ptr handle; + void *reserved = nullptr; +}; +} // namespace onevpl +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // VPP_PREPROC_ENGINE +#endif // HAVE_ONEVPL diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc_defines.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc_defines.hpp new file mode 100644 index 0000000000..be215fec74 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc_defines.hpp @@ -0,0 +1,88 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifndef GAPI_STREAMING_ONEVPL_ENGINE_PREPROC_DEFINES_HPP +#define GAPI_STREAMING_ONEVPL_ENGINE_PREPROC_DEFINES_HPP + +#ifdef HAVE_ONEVPL +#include "streaming/onevpl/utils.hpp" +#include "streaming/onevpl/engine/preproc/vpp_preproc_defines.hpp" +#endif // HAVE_ONEVPL + + +namespace cv { +namespace gapi { +namespace wip { + +#ifdef VPP_PREPROC_ENGINE +#define GAPI_BACKEND_PP_PARAMS cv::gapi::wip::onevpl::vpp_pp_params +#define GAPI_BACKEND_PP_SESSIONS cv::gapi::wip::onevpl::vpp_pp_session +#else // VPP_PREPROC_ENGINE +struct empty_pp_params {}; +struct empty_pp_session {}; +#define GAPI_BACKEND_PP_PARAMS cv::gapi::wip::empty_pp_params +#define GAPI_BACKEND_PP_SESSIONS cv::gapi::wip::empty_pp_session +#endif // VPP_PREPROC_ENGINE + +struct pp_params { + using value_type = cv::util::variant; + + template + static pp_params create(Args&& ...args) { + static_assert(cv::detail::contains::value, + "Invalid BackendSpecificParamType requested"); + pp_params ret; + ret.value = BackendSpecificParamType{std::forward(args)...}; + return ret; + } + + template + BackendSpecificParamType& get() { + static_assert(cv::detail::contains::value, + "Invalid BackendSpecificParamType requested"); + return cv::util::get(value); + } + + template + const BackendSpecificParamType& get() const { + return static_cast(const_cast(this)->get()); + } +private: + value_type value; +}; + +struct pp_session { + using value_type = cv::util::variant; + + template + static pp_session create(Args&& ...args) { + static_assert(cv::detail::contains::value, + "Invalid BackendSpecificSesionType requested"); + pp_session ret; + ret.value = BackendSpecificSesionType{std::forward(args)...};; + return ret; + } + + template + BackendSpecificSesionType &get() { + static_assert(cv::detail::contains::value, + "Invalid BackendSpecificSesionType requested"); + return cv::util::get(value); + } + + template + const BackendSpecificSesionType &get() const { + return const_cast(this)->get(); + } +private: + value_type value; +}; +} // namespace wip +} // namespace gapi +} // namespace cv + +#endif // GAPI_STREAMING_ONEVPL_ENGINE_PREPROC_DEFINES_HPP diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.cpp b/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.cpp new file mode 100644 index 0000000000..ff9f103b5a --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.cpp @@ -0,0 +1,83 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#include +#include "streaming/onevpl/engine/preproc_engine_interface.hpp" +#include "streaming/onevpl/engine/preproc/preproc_dispatcher.hpp" + +#ifdef HAVE_ONEVPL +#include "streaming/onevpl/onevpl_export.hpp" +#include "streaming/onevpl/engine/preproc/preproc_engine.hpp" + +#include "streaming/onevpl/accelerators/accel_policy_dx11.hpp" +#include "streaming/onevpl/accelerators/accel_policy_cpu.hpp" +#include "streaming/onevpl/accelerators/surface/surface.hpp" +#include "streaming/onevpl/cfg_param_device_selector.hpp" +#include "streaming/onevpl/cfg_params_parser.hpp" + +#endif //HAVE_ONEVPL + +#include "logger.hpp" + +namespace cv { +namespace gapi { +namespace wip { + +template +std::unique_ptr +IPreprocEngine::create_preproc_engine_impl(const PreprocEngineArgs& ...) { + GAPI_Assert(false && "Unsupported "); +} + +template <> +std::unique_ptr +IPreprocEngine::create_preproc_engine_impl(const onevpl::Device &device, + const onevpl::Context &context) { + using namespace onevpl; + cv::util::suppress_unused_warning(device); + cv::util::suppress_unused_warning(context); + std::unique_ptr dispatcher(new VPPPreprocDispatcher); +#ifdef HAVE_ONEVPL + if (device.get_type() == onevpl::AccelType::DX11) { + bool gpu_pp_is_created = false; +#ifdef HAVE_DIRECTX +#ifdef HAVE_D3D11 + GAPI_LOG_INFO(nullptr, "Creating DX11 VPP preprocessing engine"); + // create GPU VPP preproc engine + dispatcher->insert_worker( + std::unique_ptr{ + new VPLDX11AccelerationPolicy( + std::make_shared( + device, context, CfgParams{})) + }); + GAPI_LOG_INFO(nullptr, "DX11 VPP preprocessing engine created"); + gpu_pp_is_created = true; +#endif +#endif + GAPI_Assert(gpu_pp_is_created && "VPP preproc for GPU is requested, but it is avaiable only for DX11 at now"); + } else { + GAPI_LOG_INFO(nullptr, "Creating CPU VPP preprocessing engine"); + dispatcher->insert_worker( + std::unique_ptr{ + new VPLCPUAccelerationPolicy( + std::make_shared(CfgParams{}))}); + GAPI_LOG_INFO(nullptr, "CPU VPP preprocessing engine created"); + } +#endif // HAVE_ONEVPL + return dispatcher; +} + + +// Force instantiation +template +std::unique_ptr +IPreprocEngine::create_preproc_engine_impl + (const onevpl::Device &device, + const onevpl::Context &ctx); +} // namespace wip +} // namespace gapi +} // namespace cv diff --git a/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.hpp b/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.hpp new file mode 100644 index 0000000000..72c1dbd0a7 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/engine/preproc_engine_interface.hpp @@ -0,0 +1,46 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + +#ifndef GAPI_STREAMING_ONEVPL_ENGINE_PROCESSING_ENGINE_INTERFACE_HPP +#define GAPI_STREAMING_ONEVPL_ENGINE_PROCESSING_ENGINE_INTERFACE_HPP + +#include "precomp.hpp" +#include +#include + +#include "streaming/onevpl/engine/preproc_defines.hpp" + +namespace cv { +namespace gapi { +namespace wip { + +struct IPreprocEngine { + virtual ~IPreprocEngine() = default; + + virtual cv::util::optional + is_applicable(const cv::MediaFrame& in_frame) = 0; + + virtual pp_session + initialize_preproc(const pp_params& initial_frame_param, + const GFrameDesc& required_frame_descr) = 0; + virtual cv::MediaFrame + run_sync(const pp_session &sess, const cv::MediaFrame& in_frame, + const cv::util::optional &opt_roi = {}) = 0; + + template + static std::unique_ptr create_preproc_engine(const PreprocEngineArgs& ...args) { + static_assert(std::is_base_of::value, + "SpecificPreprocEngine must have reachable ancessor IPreprocEngine"); + return create_preproc_engine_impl(args...); + } +private: + template + static std::unique_ptr create_preproc_engine_impl(const PreprocEngineArgs &...args); +}; +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // GAPI_STREAMING_ONEVPL_ENGINE_PROCESSING_ENGINE_INTERFACE_HPP diff --git a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp index 36d36d5ec0..6226061925 100644 --- a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.cpp @@ -26,34 +26,29 @@ namespace cv { namespace gapi { namespace wip { namespace onevpl { +using vpp_param_storage = const std::map; +using vpp_param_storage_cit = typename vpp_param_storage::const_iterator; template -bool set_vpp_param(const char* name, Type& out_vpp_param, - const std::map ¶ms_storage, - mfxSession session); +Type get_mfx_value(const vpp_param_storage_cit &cit); template<> -bool set_vpp_param(const char* name, uint32_t& out_vpp_param, - const std::map ¶ms_storage, - mfxSession session) { - auto it = params_storage.find(name); - if (it != params_storage.end()) { - auto value = it->second.Data.U32; - GAPI_LOG_INFO(nullptr, "[" << session << "] set \"" << name << - "\": " << value); - out_vpp_param = value; - return true; - } - return false; +uint16_t get_mfx_value(const vpp_param_storage_cit& cit) { + return cit->second.Data.U16; } template<> -bool set_vpp_param(const char* name, uint16_t& out_vpp_param, - const std::map ¶ms_storage, - mfxSession session) { +uint32_t get_mfx_value(const vpp_param_storage_cit& cit) { + return cit->second.Data.U32; +} + +template +bool set_vpp_param(const char* name, Type& out_vpp_param, + const vpp_param_storage ¶ms_storage, + mfxSession session) { auto it = params_storage.find(name); if (it != params_storage.end()) { - auto value = it->second.Data.U16; + auto value = get_mfx_value(it); GAPI_LOG_INFO(nullptr, "[" << session << "] set \"" << name << "\": " << value); out_vpp_param = value; @@ -81,7 +76,6 @@ VPLLegacyTranscodeEngine::VPLLegacyTranscodeEngine(std::unique_ptr ExecutionStatus @@ -110,11 +104,8 @@ VPLLegacyTranscodeEngine::VPLLegacyTranscodeEngine(std::unique_ptrDataLength)) - ? my_sess.stream.get() - - : nullptr, /* No more data to read, start decode draining mode*/ - my_sess.procesing_surface_ptr.lock()->get_handle(), + my_sess.get_mfx_bitstream_ptr(), + my_sess.processing_surface_ptr.lock()->get_handle(), &sync_pair.second, &sync_pair.first); @@ -122,7 +113,7 @@ VPLLegacyTranscodeEngine::VPLLegacyTranscodeEngine(std::unique_ptrget_handle() << + my_sess.processing_surface_ptr.lock()->get_handle() << ", dec out surface: " << sync_pair.second << ", status: " << mfxstatus_to_string(my_sess.last_status)); @@ -134,12 +125,12 @@ VPLLegacyTranscodeEngine::VPLLegacyTranscodeEngine(std::unique_ptrget_handle(), + my_sess.get_mfx_bitstream_ptr(), + my_sess.processing_surface_ptr.lock()->get_handle(), &sync_pair.second, &sync_pair.first); @@ -257,7 +248,7 @@ VPLLegacyTranscodeEngine::VPLLegacyTranscodeEngine(std::unique_ptr ExecutionStatus { return this->process_error(sess.last_status, static_cast(sess)); @@ -287,11 +278,11 @@ VPLLegacyTranscodeEngine::initialize_session(mfxSession mfx_session, // override some in-params if (set_vpp_param(CfgParam::vpp_in_width_name(), mfxVPPParams.vpp.In.Width, - cfg_vpp_params, mfx_session)) { + cfg_vpp_params, mfx_session)) { mfxVPPParams.vpp.In.Width = ALIGN16(mfxVPPParams.vpp.In.Width); } if (set_vpp_param(CfgParam::vpp_in_height_name(), mfxVPPParams.vpp.In.Height, - cfg_vpp_params, mfx_session)) { + cfg_vpp_params, mfx_session)) { mfxVPPParams.vpp.In.Height = ALIGN16(mfxVPPParams.vpp.In.Height); } set_vpp_param(CfgParam::vpp_in_crop_x_name(), mfxVPPParams.vpp.In.CropX, @@ -309,11 +300,11 @@ VPLLegacyTranscodeEngine::initialize_session(mfxSession mfx_session, set_vpp_param(CfgParam::vpp_out_chroma_format_name(), mfxVPPParams.vpp.Out.ChromaFormat, cfg_vpp_params, mfx_session); if (set_vpp_param(CfgParam::vpp_out_width_name(), mfxVPPParams.vpp.Out.Width, - cfg_vpp_params, mfx_session)) { + cfg_vpp_params, mfx_session)) { mfxVPPParams.vpp.Out.Width = ALIGN16(mfxVPPParams.vpp.Out.Width); } if (set_vpp_param(CfgParam::vpp_out_height_name(), mfxVPPParams.vpp.Out.Height, - cfg_vpp_params, mfx_session)) { + cfg_vpp_params, mfx_session)) { mfxVPPParams.vpp.Out.Height = ALIGN16(mfxVPPParams.vpp.Out.Height); } set_vpp_param(CfgParam::vpp_out_crop_x_name(), mfxVPPParams.vpp.Out.CropX, @@ -367,7 +358,7 @@ VPLLegacyTranscodeEngine::initialize_session(mfxSession mfx_session, } - // NB: Assing ID as upper limit descendant to distinguish specific VPP allocation + // NB: Assign ID as upper limit descendant to distinguish specific VPP allocation // from decode allocations witch started from 0: by local module convention vppRequests[1].AllocId = std::numeric_limits::max(); @@ -394,7 +385,7 @@ VPLLegacyTranscodeEngine::initialize_session(mfxSession mfx_session, sess_ptr->init_transcode_surface_pool(vpp_out_pool_key); // prepare working surfaces - sess_ptr->swap_surface(*this); + sess_ptr->swap_decode_surface(*this); sess_ptr->swap_transcode_surface(*this); return sess_ptr; } @@ -452,10 +443,6 @@ void VPLLegacyTranscodeEngine::validate_vpp_param(const mfxVideoParam& mfxVPPPar GAPI_LOG_INFO(nullptr, "Finished VPP param validation"); } -ProcessingEngineBase::ExecutionStatus VPLLegacyTranscodeEngine::execute_op(operation_t& op, EngineSession& sess) { - return op(sess); -} - void VPLLegacyTranscodeEngine::on_frame_ready(LegacyTranscodeSession& sess, mfxFrameSurface1* ready_surface) { @@ -463,8 +450,10 @@ void VPLLegacyTranscodeEngine::on_frame_ready(LegacyTranscodeSession& sess, // manage memory ownership rely on acceleration policy ready_surface->Data.Locked--; // TODO -S- workaround + + VPLAccelerationPolicy::FrameConstructorArgs args{ready_surface, sess.session}; auto frame_adapter = acceleration_policy->create_frame_adapter(sess.vpp_out_pool_id, - ready_surface); + args); ready_frames.emplace(cv::MediaFrame(std::move(frame_adapter)), sess.generate_frame_meta()); // pop away synced out object diff --git a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp index cf0621dd93..d06b76a13d 100644 --- a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp @@ -33,8 +33,6 @@ public: static std::map get_vpp_params(const std::vector &cfg_params); private: - ExecutionStatus execute_op(operation_t& op, EngineSession& sess) override; - void on_frame_ready(LegacyTranscodeSession& sess, mfxFrameSurface1* ready_surface); void validate_vpp_param(const mfxVideoParam& mfxVPPParams); diff --git a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.cpp b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.cpp index 9fcabc7e10..8672a000ad 100644 --- a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.cpp +++ b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.cpp @@ -42,22 +42,7 @@ void LegacyTranscodeSession::init_transcode_surface_pool(VPLAccelerationPolicy:: void LegacyTranscodeSession::swap_transcode_surface(VPLLegacyTranscodeEngine& engine) { VPLAccelerationPolicy* acceleration_policy = engine.get_accel(); GAPI_Assert(acceleration_policy && "Empty acceleration_policy"); - try { - auto cand = acceleration_policy->get_free_surface(vpp_out_pool_id).lock(); - - GAPI_LOG_DEBUG(nullptr, "[" << session << "] swap surface" - ", old: " << (!vpp_surface_ptr.expired() - ? vpp_surface_ptr.lock()->get_handle() - : nullptr) << - ", new: "<< cand->get_handle()); - - vpp_surface_ptr = cand; - } catch (const std::runtime_error& ex) { - GAPI_LOG_WARNING(nullptr, "[" << session << "] error: " << ex.what()); - - // Delegate exception processing on caller - throw; - } + request_free_surface(session, vpp_out_pool_id, *acceleration_policy, vpp_surface_ptr); } const mfxFrameInfo& LegacyTranscodeSession::get_video_param() const { diff --git a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.hpp b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.hpp index aa6f70c587..ad9ed8e60b 100644 --- a/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.hpp +++ b/modules/gapi/src/streaming/onevpl/engine/transcode/transcode_session.hpp @@ -14,10 +14,7 @@ namespace cv { namespace gapi { namespace wip { namespace onevpl { - -struct IDataProvider; class Surface; -struct VPLAccelerationPolicy; class GAPI_EXPORTS LegacyTranscodeSession : public LegacyDecodeSession { public: @@ -33,8 +30,8 @@ public: const mfxFrameInfo& get_video_param() const override; private: mfxVideoParam mfx_transcoder_param; - VPLAccelerationPolicy::pool_key_t vpp_out_pool_id; + std::weak_ptr vpp_surface_ptr; std::queue vpp_queue; }; diff --git a/modules/gapi/src/streaming/onevpl/onevpl_export.hpp b/modules/gapi/src/streaming/onevpl/onevpl_export.hpp index 44970ee7be..e93a30d869 100644 --- a/modules/gapi/src/streaming/onevpl/onevpl_export.hpp +++ b/modules/gapi/src/streaming/onevpl/onevpl_export.hpp @@ -16,6 +16,9 @@ #include #include + +extern mfxLoader mfx_handle; +extern int impl_number; #endif // HAVE_ONEVPL #if defined(_MSC_VER) diff --git a/modules/gapi/src/streaming/onevpl/source.cpp b/modules/gapi/src/streaming/onevpl/source.cpp index e5b045188d..3bad463e41 100644 --- a/modules/gapi/src/streaming/onevpl/source.cpp +++ b/modules/gapi/src/streaming/onevpl/source.cpp @@ -33,6 +33,13 @@ GSource::GSource(const std::string& filePath, accel_ctx_ptr, cfg_params)) { } +GSource::GSource(const std::string& filePath, + const CfgParams& cfg_params, + const Device &device, const Context &ctx) : + GSource(filePath, cfg_params, + std::make_shared(device, ctx, cfg_params)) { +} + GSource::GSource(const std::string& filePath, const CfgParams& cfg_params, std::shared_ptr selector) : @@ -74,6 +81,10 @@ GSource::GSource(const std::string&, const CfgParams&, const std::string&, GAPI_Assert(false && "Unsupported: G-API compiled without `WITH_GAPI_ONEVPL=ON`"); } +GSource::GSource(const std::string&, const CfgParams&, const Device &, const Context &) { + GAPI_Assert(false && "Unsupported: G-API compiled without `WITH_GAPI_ONEVPL=ON`"); +} + GSource::GSource(const std::string&, const CfgParams&, std::shared_ptr) { GAPI_Assert(false && "Unsupported: G-API compiled without `WITH_GAPI_ONEVPL=ON`"); } diff --git a/modules/gapi/src/streaming/onevpl/source_priv.cpp b/modules/gapi/src/streaming/onevpl/source_priv.cpp index d1ff15b06d..7afe1cde56 100644 --- a/modules/gapi/src/streaming/onevpl/source_priv.cpp +++ b/modules/gapi/src/streaming/onevpl/source_priv.cpp @@ -36,6 +36,10 @@ GMetaArg GSource::Priv::descr_of() const { #else // HAVE_ONEVPL +// TODO global variable move it into Source after CloneSession issue resolving +mfxLoader mfx_handle = MFXLoad(); +int impl_number = 0; + namespace cv { namespace gapi { namespace wip { @@ -47,7 +51,7 @@ enum { }; GSource::Priv::Priv() : - mfx_handle(MFXLoad()), +// mfx_handle(MFXLoad()), mfx_impl_description(), mfx_handle_configs(), cfg_params(), @@ -125,7 +129,7 @@ GSource::Priv::Priv(std::shared_ptr provider, } // collect optional-preferred input parameters from input params - // which may (optionally) or may not be used to choose the most preferrable + // which may (optionally) or may not be used to choose the most preferable // VPL implementation (for example, specific API version or Debug/Release VPL build) std::vector preferred_params; std::copy_if(cfg_params.begin(), cfg_params.end(), std::back_inserter(preferred_params), @@ -133,7 +137,7 @@ GSource::Priv::Priv(std::shared_ptr provider, std::sort(preferred_params.begin(), preferred_params.end()); GAPI_LOG_DEBUG(nullptr, "Find MFX better implementation from handle: " << mfx_handle << - " is satisfying preferrable params count: " << preferred_params.size()); + " is satisfying preferable params count: " << preferred_params.size()); int i = 0; mfxImplDescription *idesc = nullptr; std::vector available_impl_descriptions; @@ -158,7 +162,7 @@ GSource::Priv::Priv(std::shared_ptr provider, GAPI_LOG_INFO(nullptr, "Implementation index: " << i << "\n" << ss.str()); // Only one VPL implementation is required for GSource here. - // Let's find intersection params from available impl with preferrable input params + // Let's find intersection params from available impl with preferable input params // to find best match. // An available VPL implementation with max matching count std::vector impl_params = get_params_from_string(ss.str()); @@ -174,7 +178,7 @@ GSource::Priv::Priv(std::shared_ptr provider, // in case of no input preferrance we consider all params are matched // for the first available VPL implementation. It will be a chosen one matches_count.emplace(impl_params.size(), i++); - GAPI_LOG_DEBUG(nullptr, "No preferrable params, use the first one implementation"); + GAPI_LOG_DEBUG(nullptr, "No preferable params, use the first one implementation"); break; } else { GAPI_LOG_DEBUG(nullptr, "Equal param intersection count: " << matched_params.size()); @@ -187,7 +191,8 @@ GSource::Priv::Priv(std::shared_ptr provider, GAPI_Assert(max_match_it != matches_count.rend() && "Cannot find matched MFX implementation for requested configuration"); - int impl_number = max_match_it->second; + // TODO impl_number is global for now + impl_number = max_match_it->second; GAPI_LOG_INFO(nullptr, "Chosen implementation index: " << impl_number); // release unusable impl available_impl_descriptions @@ -261,7 +266,7 @@ GSource::Priv::~Priv() { GAPI_LOG_INFO(nullptr, "Unload MFX implementation description: " << mfx_impl_description); MFXDispReleaseImplDescription(mfx_handle, mfx_impl_description); GAPI_LOG_INFO(nullptr, "Unload MFX handle: " << mfx_handle); - MFXUnload(mfx_handle); + //MFXUnload(mfx_handle); } std::unique_ptr GSource::Priv::initializeHWAccel(std::shared_ptr selector) diff --git a/modules/gapi/src/streaming/onevpl/source_priv.hpp b/modules/gapi/src/streaming/onevpl/source_priv.hpp index b835850d35..21248df7fb 100644 --- a/modules/gapi/src/streaming/onevpl/source_priv.hpp +++ b/modules/gapi/src/streaming/onevpl/source_priv.hpp @@ -44,7 +44,8 @@ private: Priv(); std::unique_ptr initializeHWAccel(std::shared_ptr selector); - mfxLoader mfx_handle; + // TODO not it is global variable. Waiting for FIX issue with CloneSession + // mfxLoader mfx_handle; mfxImplDescription *mfx_impl_description; std::vector mfx_handle_configs; std::vector cfg_params; diff --git a/modules/gapi/src/streaming/onevpl/utils.cpp b/modules/gapi/src/streaming/onevpl/utils.cpp index 3ec0dea8ae..549ff597cf 100644 --- a/modules/gapi/src/streaming/onevpl/utils.cpp +++ b/modules/gapi/src/streaming/onevpl/utils.cpp @@ -25,6 +25,8 @@ #define APPEND_STRINGIFY_MASK_N_ERASE(value, pref, mask) \ if (value & mask) { ss << pref << #mask; value ^= mask; } +#define DUMP_MEMBER(stream, object, member) \ + stream << #member << ": " << object.member << "\n"; namespace cv { namespace gapi { @@ -359,11 +361,47 @@ std::string mfxstatus_to_string(mfxStatus err) { return ret; } +std::string mfx_frame_info_to_string(const mfxFrameInfo &info) { + std::stringstream ss; + DUMP_MEMBER(ss, info, FrameRateExtN) + DUMP_MEMBER(ss, info, FrameRateExtD) + DUMP_MEMBER(ss, info, AspectRatioW) + DUMP_MEMBER(ss, info, AspectRatioH) + DUMP_MEMBER(ss, info, CropX) + DUMP_MEMBER(ss, info, CropY) + DUMP_MEMBER(ss, info, CropW) + DUMP_MEMBER(ss, info, CropH) + DUMP_MEMBER(ss, info, ChannelId) + DUMP_MEMBER(ss, info, BitDepthLuma) + DUMP_MEMBER(ss, info, BitDepthChroma) + DUMP_MEMBER(ss, info, Shift) + DUMP_MEMBER(ss, info, FourCC) + DUMP_MEMBER(ss, info, Width) + DUMP_MEMBER(ss, info, Height) + DUMP_MEMBER(ss, info, BufferSize) + DUMP_MEMBER(ss, info, PicStruct) + DUMP_MEMBER(ss, info, ChromaFormat); + return ss.str(); +} + +int compare(const mfxFrameInfo &lhs, const mfxFrameInfo &rhs) { + //NB: mfxFrameInfo is a `packed` struct declared in VPL + return memcmp(&lhs, &rhs, sizeof(mfxFrameInfo)); +} + +bool operator< (const mfxFrameInfo &lhs, const mfxFrameInfo &rhs) { + return (compare(lhs, rhs) < 0); +} + +bool operator== (const mfxFrameInfo &lhs, const mfxFrameInfo &rhs) { + return (compare(lhs, rhs) == 0); +} + std::string ext_mem_frame_type_to_cstr(int type) { std::stringstream ss; APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_DXVA2_DECODER_TARGET); APPEND_STRINGIFY_MASK_N_ERASE(type, "|", MFX_MEMTYPE_DXVA2_PROCESSOR_TARGET); - // NB: accoring to VPL source the commented MFX_* constane below are belong to the + // NB: according to VPL source the commented MFX_* constane below are belong to the // same actual integral value as condition abobe. So it is impossible // to distinct them in condition branch. Just put this comment and possible // constans here... diff --git a/modules/gapi/src/streaming/onevpl/utils.hpp b/modules/gapi/src/streaming/onevpl/utils.hpp index 76a66a63f4..459f612743 100644 --- a/modules/gapi/src/streaming/onevpl/utils.hpp +++ b/modules/gapi/src/streaming/onevpl/utils.hpp @@ -76,6 +76,10 @@ mfxU32 cstr_to_mfx_version(const char* cstr); std::string GAPI_EXPORTS mfxstatus_to_string(int64_t err); std::string GAPI_EXPORTS mfxstatus_to_string(mfxStatus err); +std::string mfx_frame_info_to_string(const mfxFrameInfo &info); +bool operator< (const mfxFrameInfo &lhs, const mfxFrameInfo &rhs); +bool operator== (const mfxFrameInfo &lhs, const mfxFrameInfo &rhs); + std::ostream& operator<< (std::ostream& out, const mfxImplDescription& idesc); std::string ext_mem_frame_type_to_cstr(int type); diff --git a/modules/gapi/src/utils/itt.hpp b/modules/gapi/src/utils/itt.hpp index 0b49af7226..4a9cd54d65 100644 --- a/modules/gapi/src/utils/itt.hpp +++ b/modules/gapi/src/utils/itt.hpp @@ -13,7 +13,7 @@ #include -// NOTE: OPENCV_WITH_ITT is only defined if ITT dependecy is built by OpenCV infrastructure. +// NOTE: OPENCV_WITH_ITT is only defined if ITT dependency is built by OpenCV infrastructure. // There will not be such define in G-API standalone mode. // TODO: Consider using OpenCV's trace.hpp #if defined(OPENCV_WITH_ITT) diff --git a/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp b/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp index 239afc38c4..b462e701f2 100644 --- a/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp +++ b/modules/gapi/test/cpu/gapi_ocv_stateful_kernel_tests.cpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2022 Intel Corporation #include "gapi_ocv_stateful_kernel_test_utils.hpp" #include @@ -14,6 +14,7 @@ #include #endif +#include // required by std::shared_ptr namespace opencv_test { @@ -21,6 +22,11 @@ namespace opencv_test { std::string method; }; + + struct CountStateSetupsParams + { + std::shared_ptr pSetupsCount; + }; } // namespace opencv_test namespace cv @@ -34,6 +40,14 @@ namespace cv return "org.opencv.test.background_substractor_state_params"; } }; + + template<> struct CompileArgTag + { + static const char* tag() + { + return "org.opencv.test.count_state_setups_params"; + } + }; } // namespace detail } // namespace cv @@ -127,8 +141,101 @@ namespace } }; #endif + + G_TYPED_KERNEL(GCountStateSetups, (GMat)>, + "org.opencv.test.count_state_setups") + { + static GOpaqueDesc outMeta(GMatDesc /* in */) { return empty_gopaque_desc(); } + }; + + GAPI_OCV_KERNEL_ST(GOCVCountStateSetups, GCountStateSetups, int) + { + static void setup(const cv::GMatDesc &, std::shared_ptr &, + const cv::GCompileArgs &compileArgs) + { + auto params = cv::gapi::getCompileArg(compileArgs) + .value_or(CountStateSetupsParams { }); + if (params.pSetupsCount != nullptr) { + (*params.pSetupsCount)++; + } + } + + static void run(const cv::Mat & , bool &out, int &) + { + out = true; + } + }; }; +TEST(StatefulKernel, StateInitOnceInRegularMode) +{ + cv::GMat in; + cv::GOpaque out = GCountStateSetups::on(in); + cv::GComputation c(cv::GIn(in), cv::GOut(out)); + + // Input mat: + cv::Mat inputData(1080, 1920, CV_8UC1); + cv::randu(inputData, cv::Scalar::all(1), cv::Scalar::all(128)); + + // variable to update when state is initialized in the kernel + CountStateSetupsParams params; + params.pSetupsCount.reset(new int(0)); + + // Testing for 100 frames + bool result { }; + for (int i = 0; i < 100; ++i) { + c.apply(cv::gin(inputData), cv::gout(result), + cv::compile_args(cv::gapi::kernels(), params)); + EXPECT_TRUE(result); + EXPECT_TRUE(params.pSetupsCount != nullptr); + EXPECT_EQ(1, *params.pSetupsCount); + } +}; + +struct StateInitOnce : public ::testing::TestWithParam{}; +TEST_P(StateInitOnce, StreamingCompiledWithMeta) +{ + bool compileWithMeta = GetParam(); + cv::GMat in; + cv::GOpaque out = GCountStateSetups::on(in); + cv::GComputation c(cv::GIn(in), cv::GOut(out)); + + // Input mat: + cv::Mat inputData(1080, 1920, CV_8UC1); + cv::randu(inputData, cv::Scalar::all(1), cv::Scalar::all(128)); + + // variable to update when state is initialized in the kernel + CountStateSetupsParams params; + params.pSetupsCount.reset(new int(0)); + + // Compilation & testing + auto ccomp = (compileWithMeta) + ? c.compileStreaming(cv::descr_of(inputData), + cv::compile_args(cv::gapi::kernels(), + params)) + : c.compileStreaming( + cv::compile_args(cv::gapi::kernels(), + params)); + + ccomp.setSource(cv::gin(inputData)); + + ccomp.start(); + EXPECT_TRUE(ccomp.running()); + + int counter { }; + bool result; + // Process mat 100 times + while (ccomp.pull(cv::gout(result)) && (counter++ < 100)) { + EXPECT_TRUE(params.pSetupsCount != nullptr); + EXPECT_EQ(1, *params.pSetupsCount); + } + + ccomp.stop(); + EXPECT_FALSE(ccomp.running()); +} + +INSTANTIATE_TEST_CASE_P(StatefulKernel, StateInitOnce, ::testing::Bool()); + TEST(StatefulKernel, StateIsMutableInRuntime) { constexpr int expectedCallsCount = 10; @@ -163,7 +270,43 @@ TEST(StatefulKernel, StateIsMutableInRuntime) } -TEST(StatefulKernel, StateIsAutoResetForNewStream) +TEST(StateIsResetOnNewStream, RegularMode) +{ + cv::GMat in; + cv::GOpaque out = GCountStateSetups::on(in); + cv::GComputation c(cv::GIn(in), cv::GOut(out)); + + // Input mat: + cv::Mat inputData(1080, 1920, CV_8UC1); + cv::randu(inputData, cv::Scalar::all(1), cv::Scalar::all(128)); + + // variable to update when state is initialized in the kernel + CountStateSetupsParams params; + params.pSetupsCount.reset(new int(0)); + + auto setupsCounter = c.compile(cv::descr_of(inputData), + cv::compile_args(cv::gapi::kernels(), + params)); + + bool result { }; + for (int i = 0; i < 2; ++i) { + setupsCounter(cv::gin(inputData), cv::gout(result)); + EXPECT_TRUE(params.pSetupsCount != nullptr); + EXPECT_EQ(1, *params.pSetupsCount); + } + + EXPECT_TRUE(params.pSetupsCount != nullptr); + EXPECT_EQ(1, *params.pSetupsCount); + setupsCounter.prepareForNewStream(); + + for (int i = 0; i < 2; ++i) { + setupsCounter(cv::gin(inputData), cv::gout(result)); + EXPECT_TRUE(params.pSetupsCount != nullptr); + EXPECT_EQ(2, *params.pSetupsCount); + } +} + +TEST(StateIsResetOnNewStream, StreamingMode) { cv::GMat in; cv::GOpaque out = GIsStateUpToDate::on(in); @@ -272,7 +415,7 @@ TEST(StatefulKernel, StateIsInitViaCompArgs) // Allowing 1% difference of all pixels between G-API and OpenCV results compareBackSubResults(gapiForeground, ocvForeground, 1); - // Additionally, test the case where state is resetted + // Additionally, test the case where state is reset gapiBackSub.prepareForNewStream(); gapiBackSub(cv::gin(frame), cv::gout(gapiForeground)); pOcvBackSub->apply(frame, ocvForeground); @@ -342,7 +485,118 @@ TEST(StatefulKernel, StateIsInitViaCompArgsInStreaming) // Allowing 5% difference of all pixels between G-API and reference OpenCV results testBackSubInStreaming(gapiBackSub, 5); } + +TEST(StatefulKernel, StateIsChangedViaCompArgsOnReshape) +{ + cv::GMat in; + cv::GComputation comp(in, GBackSub::on(in)); + + const auto pkg = cv::gapi::kernels(); + + // OpenCV reference substractor + auto pOCVBackSubKNN = createBackgroundSubtractorKNN(); + auto pOCVBackSubMOG2 = createBackgroundSubtractorMOG2(); + + const auto run = [&](const std::string& videoPath, const std::string& method) { + auto path = findDataFile(videoPath); + cv::gapi::wip::IStreamSource::Ptr source; + try { + source = gapi::wip::make_src(path); + } catch(...) { + throw SkipTestException("Video file can not be opened"); + } + cv::Mat inMat, gapiForeground, ocvForeground; + + for (int i = 0; i < 10; i++) { + cv::gapi::wip::Data inData; + source->pull(inData); + inMat = cv::util::get(inData); + comp.apply(inMat, gapiForeground, + cv::compile_args(pkg, BackSubStateParams{method})); + + if (method == "knn") { + pOCVBackSubKNN->apply(inMat, ocvForeground, -1); + // Allowing 1% difference among all pixels + compareBackSubResults(gapiForeground, ocvForeground, 1); + } else if (method == "mog2") { + pOCVBackSubMOG2->apply(inMat, ocvForeground, -1); + compareBackSubResults(gapiForeground, ocvForeground, 5); + } else { + CV_Assert(false && "Unknown BackSub method"); + } + } + }; + + run("cv/video/768x576.avi", "knn"); + run("cv/video/1920x1080.avi", "mog2"); +} + +TEST(StatefulKernel, StateIsResetOnceOnReshapeInStreaming) +{ + cv::GMat in; + cv::GOpaque out = GCountStateSetups::on(in); + cv::GComputation c(cv::GIn(in), cv::GOut(out)); + + // variable to update when state is initialized in the kernel + CountStateSetupsParams params; + params.pSetupsCount.reset(new int(0)); + + auto ccomp = c.compileStreaming( + cv::compile_args(cv::gapi::kernels(), params)); + + auto run = [&ccomp, ¶ms](const std::string& videoPath, int expectedSetupsCount) { + auto path = findDataFile(videoPath); + try { + ccomp.setSource(path); + } catch(...) { + throw SkipTestException("Video file can not be opened"); + } + ccomp.start(); + + int frames = 0; + bool result = false; + while (ccomp.pull(cv::gout(result)) && (frames++ < 10)) { + EXPECT_TRUE(result); + EXPECT_TRUE(params.pSetupsCount != nullptr); + EXPECT_EQ(expectedSetupsCount, *params.pSetupsCount); + } + ccomp.stop(); + }; + + run("cv/video/768x576.avi", 1); + // FIXME: it should be 2, not 3 for expectedSetupsCount here. + // With current implemention both GCPUExecutable reshape() and + // handleNewStream() call setupKernelStates() + run("cv/video/1920x1080.avi", 3); +} #endif + +TEST(StatefulKernel, StateIsAutoResetOnReshape) +{ + cv::GMat in; + cv::GOpaque up_to_date = GIsStateUpToDate::on(in); + cv::GOpaque calls_count = GCountCalls::on(in); + cv::GComputation comp(cv::GIn(in), cv::GOut(up_to_date, calls_count)); + + auto run = [&comp](const cv::Mat& in_mat) { + const auto pkg = cv::gapi::kernels(); + bool stateIsUpToDate = false; + int callsCount = 0; + for (int i = 0; i < 3; i++) { + comp.apply(cv::gin(in_mat), cv::gout(stateIsUpToDate, callsCount), + cv::compile_args(pkg)); + EXPECT_TRUE(stateIsUpToDate); + EXPECT_EQ(i+1, callsCount); + } + }; + + cv::Mat in_mat1(32, 32, CV_8UC1); + run(in_mat1); + + cv::Mat in_mat2(16, 16, CV_8UC1); + run(in_mat2); +} + //------------------------------------------------------------------------------------------------------------- diff --git a/modules/gapi/test/gapi_sample_pipelines.cpp b/modules/gapi/test/gapi_sample_pipelines.cpp index b4d9f3e100..da71cd0ab0 100644 --- a/modules/gapi/test/gapi_sample_pipelines.cpp +++ b/modules/gapi/test/gapi_sample_pipelines.cpp @@ -49,7 +49,25 @@ namespace static GMatDesc outMeta(GMatDesc in) { return in; } }; - // These definitons test the correct macro work if the kernel has multiple output values + G_TYPED_KERNEL(GZeros, , "org.opencv.test.zeros") + { + static GMatDesc outMeta(GMatDesc /*in*/, GMatDesc user_desc) + { + return user_desc; + } + }; + + GAPI_OCV_KERNEL(GOCVZeros, GZeros) + { + static void run(const cv::Mat& /*in*/, + const cv::GMatDesc& /*desc*/, + cv::Mat& out) + { + out.setTo(0); + } + }; + + // These definitions test the correct macro work if the kernel has multiple output values G_TYPED_KERNEL(GRetGArrayTupleOfGMat2Kernel, >(GMat, Scalar)>, "org.opencv.test.retarrayoftupleofgmat2kernel") {}; G_TYPED_KERNEL(GRetGArraTupleyOfGMat3Kernel, >(GMat)>, "org.opencv.test.retarrayoftupleofgmat3kernel") {}; G_TYPED_KERNEL(GRetGArraTupleyOfGMat4Kernel, >(GMat)>, "org.opencv.test.retarrayoftupleofgmat4kernel") {}; @@ -430,4 +448,69 @@ TEST(GAPI_Pipeline, ReplaceDefaultByFunctor) EXPECT_TRUE(f.is_called); } +TEST(GAPI_Pipeline, GraphOutputIs1DMat) +{ + int dim = 100; + cv::Mat in_mat(1, 1, CV_8UC3); + cv::Mat out_mat; + + cv::GMat in; + auto cc = cv::GComputation(in, GZeros::on(in, cv::GMatDesc(CV_8U, {dim}))) + .compile(cv::descr_of(in_mat), cv::compile_args(cv::gapi::kernels())); + + // NB: Computation is able to write 1D output cv::Mat to empty out_mat. + ASSERT_NO_THROW(cc(cv::gin(in_mat), cv::gout(out_mat))); + ASSERT_EQ(1, out_mat.size.dims()); + ASSERT_EQ(dim, out_mat.size[0]); + + // NB: Computation is able to write 1D output cv::Mat + // to pre-allocated with the same meta out_mat. + ASSERT_NO_THROW(cc(cv::gin(in_mat), cv::gout(out_mat))); + ASSERT_EQ(1, out_mat.size.dims()); + ASSERT_EQ(dim, out_mat.size[0]); +} + +TEST(GAPI_Pipeline, 1DMatBetweenIslands) +{ + int dim = 100; + cv::Mat in_mat(1, 1, CV_8UC3); + cv::Mat out_mat; + + cv::Mat ref_mat({dim}, CV_8U); + ref_mat.dims = 1; + ref_mat.setTo(0); + + cv::GMat in; + auto out = cv::gapi::copy(GZeros::on(cv::gapi::copy(in), cv::GMatDesc(CV_8U, {dim}))); + auto cc = cv::GComputation(in, out) + .compile(cv::descr_of(in_mat), cv::compile_args(cv::gapi::kernels())); + + cc(cv::gin(in_mat), cv::gout(out_mat)); + + EXPECT_EQ(0, cv::norm(out_mat, ref_mat)); +} + +TEST(GAPI_Pipeline, 1DMatWithinSingleIsland) +{ + int dim = 100; + cv::Size blur_sz(3, 3); + cv::Mat in_mat(10, 10, CV_8UC3); + cv::randu(in_mat, 0, 255); + cv::Mat out_mat; + + cv::Mat ref_mat({dim}, CV_8U); + ref_mat.dims = 1; + ref_mat.setTo(0); + + cv::GMat in; + auto out = cv::gapi::blur( + GZeros::on(cv::gapi::blur(in, blur_sz), cv::GMatDesc(CV_8U, {dim})), blur_sz); + auto cc = cv::GComputation(in, out) + .compile(cv::descr_of(in_mat), cv::compile_args(cv::gapi::kernels())); + + cc(cv::gin(in_mat), cv::gout(out_mat)); + + EXPECT_EQ(0, cv::norm(out_mat, ref_mat)); +} + } // namespace opencv_test diff --git a/modules/gapi/test/infer/gapi_infer_ie_test.cpp b/modules/gapi/test/infer/gapi_infer_ie_test.cpp index 8dc23a3880..3741438373 100644 --- a/modules/gapi/test/infer/gapi_infer_ie_test.cpp +++ b/modules/gapi/test/infer/gapi_infer_ie_test.cpp @@ -2915,6 +2915,47 @@ TEST(Infer, ModelWith2DInputs) #endif // HAVE_NGRAPH +TEST(TestAgeGender, ThrowBlobAndInputPrecisionMismatchStreaming) +{ + const std::string device = "MYRIAD"; + skipIfDeviceNotAvailable(device); + + initDLDTDataPath(); + + cv::gapi::ie::detail::ParamDesc params; + // NB: Precision for inputs is U8. + params.model_path = compileAgeGenderBlob(device); + params.device_id = device; + + // Configure & run G-API + using AGInfo = std::tuple; + G_API_NET(AgeGender, , "test-age-gender"); + + auto pp = cv::gapi::ie::Params { + params.model_path, params.device_id + }.cfgOutputLayers({ "age_conv3", "prob" }); + + cv::GMat in, age, gender; + std::tie(age, gender) = cv::gapi::infer(in); + auto pipeline = cv::GComputation(cv::GIn(in), cv::GOut(age, gender)) + .compileStreaming(cv::compile_args(cv::gapi::networks(pp))); + + cv::Mat in_mat(320, 240, CV_32FC3); + cv::randu(in_mat, 0, 1); + cv::Mat gapi_age, gapi_gender; + + pipeline.setSource(cv::gin(in_mat)); + pipeline.start(); + + // NB: Blob precision is U8, but user pass FP32 data, so exception will be thrown. + // Now exception comes directly from IE, but since G-API has information + // about data precision at the compile stage, consider the possibility of + // throwing exception from there. + for (int i = 0; i < 10; ++i) { + EXPECT_ANY_THROW(pipeline.pull(cv::gout(gapi_age, gapi_gender))); + } +} + } // namespace opencv_test #endif // HAVE_INF_ENGINE diff --git a/modules/gapi/test/internal/gapi_int_executor_tests.cpp b/modules/gapi/test/internal/gapi_int_executor_tests.cpp index 4745213909..b8f0e18e0b 100644 --- a/modules/gapi/test/internal/gapi_int_executor_tests.cpp +++ b/modules/gapi/test/internal/gapi_int_executor_tests.cpp @@ -2,12 +2,14 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation #include "../test_precomp.hpp" #include "../gapi_mock_kernels.hpp" +#include + namespace opencv_test { @@ -294,6 +296,28 @@ TEST_F(GExecutorReshapeTest, ReshapeCallAllocate) EXPECT_EQ(1, island1.getReshapeCounter()); } +TEST_F(GExecutorReshapeTest, CPUBackendIsReshapable) +{ + comp = cv::GComputation([](){ + cv::GMat in; + cv::GMat foo = I::Foo::on(in); + cv::GMat out = cv::gapi::bitwise_not(cv::gapi::bitwise_not(in)); + return cv::GComputation(cv::GIn(in), cv::GOut(foo, out)); + }); + // NB: Initial state + EXPECT_EQ(0, island1.getReshapeCounter()); + + // NB: First compilation. + cv::Mat out_mat2; + comp.apply(cv::gin(in_mat1), cv::gout(out_mat, out_mat2), cv::compile_args(pkg)); + EXPECT_EQ(0, island1.getReshapeCounter()); + + // NB: The entire graph is reshapable, so it won't be recompiled, but reshaped. + comp.apply(cv::gin(in_mat2), cv::gout(out_mat, out_mat2), cv::compile_args(pkg)); + EXPECT_EQ(1, island1.getReshapeCounter()); + EXPECT_EQ(0, cvtest::norm(out_mat2, in_mat2, NORM_INF)); +} + // FIXME: Add explicit tests on GMat/GScalar/GArray being connectors // between executed islands diff --git a/modules/gapi/test/streaming/gapi_streaming_tests.cpp b/modules/gapi/test/streaming/gapi_streaming_tests.cpp index 4d33d4b0c5..ffa1d452c1 100644 --- a/modules/gapi/test/streaming/gapi_streaming_tests.cpp +++ b/modules/gapi/test/streaming/gapi_streaming_tests.cpp @@ -304,6 +304,66 @@ void checkPullOverload(const cv::Mat& ref, EXPECT_EQ(0., cv::norm(ref, out_mat, cv::NORM_INF)); } +class InvalidSource : public cv::gapi::wip::IStreamSource { +public: + InvalidSource(const size_t throw_every_nth_frame, + const size_t num_frames) + : m_throw_every_nth_frame(throw_every_nth_frame), + m_curr_frame_id(0u), + m_num_frames(num_frames), + m_mat(1, 1, CV_8U) { + } + + static std::string exception_msg() + { + return "InvalidSource sucessfuly failed!"; + } + + bool pull(cv::gapi::wip::Data& d) { + ++m_curr_frame_id; + if (m_curr_frame_id > m_num_frames) { + return false; + } + + if (m_curr_frame_id % m_throw_every_nth_frame == 0) { + throw std::logic_error(InvalidSource::exception_msg()); + return true; + } else { + d = cv::Mat(m_mat); + } + + return true; + } + + cv::GMetaArg descr_of() const override { + return cv::GMetaArg{cv::descr_of(m_mat)}; + } + +private: + size_t m_throw_every_nth_frame; + size_t m_curr_frame_id; + size_t m_num_frames; + cv::Mat m_mat; +}; + +G_TYPED_KERNEL(GThrowExceptionOp, , "org.opencv.test.throw_error_op") +{ + static GMatDesc outMeta(GMatDesc in) { return in; } +}; + +GAPI_OCV_KERNEL(GThrowExceptionKernel, GThrowExceptionOp) +{ + static std::string exception_msg() + { + return "GThrowExceptionKernel sucessfuly failed"; + } + + static void run(const cv::Mat&, cv::Mat&) + { + throw std::logic_error(GThrowExceptionKernel::exception_msg()); + } +}; + } // anonymous namespace TEST_P(GAPI_Streaming, SmokeTest_ConstInput_GMat) @@ -2512,5 +2572,109 @@ TEST(GAPI_Streaming, TestDesyncMediaFrameGray) { } } +TEST(GAPI_Streaming_Exception, SingleKernelThrow) { + cv::GMat in; + auto pipeline = cv::GComputation(in, GThrowExceptionOp::on(in)) + .compileStreaming(cv::compile_args(cv::gapi::kernels())); + + cv::Mat in_mat(cv::Size(300, 300), CV_8UC3); + cv::randu(in_mat, cv::Scalar::all(0), cv::Scalar::all(255)); + pipeline.setSource(cv::gin(in_mat)); + pipeline.start(); + + EXPECT_THROW( + try { + cv::Mat out_mat; + pipeline.pull(cv::gout(out_mat)); + } catch (const std::logic_error& e) { + EXPECT_EQ(GThrowExceptionKernel::exception_msg(), e.what()); + throw; + }, std::logic_error); +} + +TEST(GAPI_Streaming_Exception, StreamingBackendExceptionAsInput) { + cv::GMat in; + auto pipeline = cv::GComputation(in, + cv::gapi::copy(GThrowExceptionOp::on(in))) + .compileStreaming(cv::compile_args(cv::gapi::kernels())); + + cv::Mat in_mat(cv::Size(300, 300), CV_8UC3); + cv::randu(in_mat, cv::Scalar::all(0), cv::Scalar::all(255)); + pipeline.setSource(cv::gin(in_mat)); + pipeline.start(); + + EXPECT_THROW( + try { + cv::Mat out_mat; + pipeline.pull(cv::gout(out_mat)); + } catch (const std::logic_error& e) { + EXPECT_EQ(GThrowExceptionKernel::exception_msg(), e.what()); + throw; + }, std::logic_error); +} + +TEST(GAPI_Streaming_Exception, RegularBacckendsExceptionAsInput) { + cv::GMat in; + auto pipeline = cv::GComputation(in, + cv::gapi::add(GThrowExceptionOp::on(in), GThrowExceptionOp::on(in))) + .compileStreaming(cv::compile_args(cv::gapi::kernels())); + + cv::Mat in_mat(cv::Size(300, 300), CV_8UC3); + cv::randu(in_mat, cv::Scalar::all(0), cv::Scalar::all(255)); + pipeline.setSource(cv::gin(in_mat)); + pipeline.start(); + + EXPECT_THROW( + try { + cv::Mat out_mat; + pipeline.pull(cv::gout(out_mat)); + } catch (const std::logic_error& e) { + EXPECT_EQ(GThrowExceptionKernel::exception_msg(), e.what()); + throw; + }, std::logic_error); +} + +TEST(GAPI_Streaming_Exception, SourceThrow) { + cv::GMat in; + auto pipeline = cv::GComputation(in, cv::gapi::copy(in)).compileStreaming(); + + pipeline.setSource(std::make_shared(1u, 1u)); + pipeline.start(); + + EXPECT_THROW( + try { + cv::Mat out_mat; + pipeline.pull(cv::gout(out_mat)); + } catch (const std::logic_error& e) { + EXPECT_EQ(InvalidSource::exception_msg(), e.what()); + throw; + }, std::logic_error); +} + +TEST(GAPI_Streaming_Exception, SourceThrowEverySecondFrame) { + constexpr size_t throw_every_nth_frame = 2u; + constexpr size_t num_frames = 10u; + size_t curr_frame = 0; + bool has_frame = true; + cv::Mat out_mat; + + cv::GMat in; + auto pipeline = cv::GComputation(in, cv::gapi::copy(in)).compileStreaming(); + + pipeline.setSource(std::make_shared(throw_every_nth_frame, num_frames)); + pipeline.start(); + while (has_frame) { + ++curr_frame; + try { + has_frame = pipeline.pull(cv::gout(out_mat)); + } catch (const std::exception& e) { + EXPECT_TRUE(curr_frame % throw_every_nth_frame == 0); + EXPECT_EQ(InvalidSource::exception_msg(), e.what()); + } + } + + // NB: Pull was called num_frames + 1(stop). + EXPECT_EQ(num_frames, curr_frame - 1); +} } // namespace opencv_test diff --git a/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp b/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp index 51fb9f276a..59723dbd5e 100644 --- a/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp +++ b/modules/gapi/test/streaming/gapi_streaming_vpl_core_test.cpp @@ -68,7 +68,7 @@ struct EmptyDataProvider : public cv::gapi::wip::onevpl::IDataProvider { struct TestProcessingSession : public cv::gapi::wip::onevpl::EngineSession { TestProcessingSession(mfxSession mfx_session) : - EngineSession(mfx_session, {}) { + EngineSession(mfx_session) { } const mfxFrameInfo& get_video_param() const override { @@ -319,7 +319,8 @@ TEST(OneVPL_Source_CPU_FrameAdapter, InitFrameAdapter) EXPECT_EQ(0, surf->get_locks_count()); { - VPLMediaFrameCPUAdapter adapter(surf); + mfxSession stub_session = reinterpret_cast(0x1); + VPLMediaFrameCPUAdapter adapter(surf, stub_session); EXPECT_EQ(1, surf->get_locks_count()); } EXPECT_EQ(0, surf->get_locks_count()); @@ -528,9 +529,9 @@ TEST(OneVPL_Source_DX11_Accel, Init) cfg_params_w_dx11.push_back(CfgParam::create_acceleration_mode(MFX_ACCEL_MODE_VIA_D3D11)); VPLDX11AccelerationPolicy accel(std::make_shared(cfg_params_w_dx11)); - mfxLoader mfx_handle = MFXLoad(); + mfxLoader test_mfx_handle = MFXLoad(); - mfxConfig cfg_inst_0 = MFXCreateConfig(mfx_handle); + mfxConfig cfg_inst_0 = MFXCreateConfig(test_mfx_handle); EXPECT_TRUE(cfg_inst_0); mfxVariant mfx_param_0; mfx_param_0.Type = MFX_VARIANT_TYPE_U32; @@ -538,7 +539,7 @@ TEST(OneVPL_Source_DX11_Accel, Init) EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_0,(mfxU8 *)CfgParam::implementation_name(), mfx_param_0), MFX_ERR_NONE); - mfxConfig cfg_inst_1 = MFXCreateConfig(mfx_handle); + mfxConfig cfg_inst_1 = MFXCreateConfig(test_mfx_handle); EXPECT_TRUE(cfg_inst_1); mfxVariant mfx_param_1; mfx_param_1.Type = MFX_VARIANT_TYPE_U32; @@ -546,7 +547,7 @@ TEST(OneVPL_Source_DX11_Accel, Init) EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_1,(mfxU8 *)CfgParam::acceleration_mode_name(), mfx_param_1), MFX_ERR_NONE); - mfxConfig cfg_inst_2 = MFXCreateConfig(mfx_handle); + mfxConfig cfg_inst_2 = MFXCreateConfig(test_mfx_handle); EXPECT_TRUE(cfg_inst_2); mfxVariant mfx_param_2; mfx_param_2.Type = MFX_VARIANT_TYPE_U32; @@ -556,7 +557,7 @@ TEST(OneVPL_Source_DX11_Accel, Init) // create session mfxSession mfx_session{}; - mfxStatus sts = MFXCreateSession(mfx_handle, 0, &mfx_session); + mfxStatus sts = MFXCreateSession(test_mfx_handle, 0, &mfx_session); EXPECT_EQ(MFX_ERR_NONE, sts); // assign acceleration @@ -600,7 +601,7 @@ TEST(OneVPL_Source_DX11_Accel, Init) EXPECT_NO_THROW(accel.deinit(mfx_session)); MFXClose(mfx_session); - MFXUnload(mfx_handle); + MFXUnload(test_mfx_handle); } TEST(OneVPL_Source_DX11_Accel_VPL, Init) @@ -611,9 +612,9 @@ TEST(OneVPL_Source_DX11_Accel_VPL, Init) cfg_params_w_dx11.push_back(CfgParam::create_acceleration_mode(MFX_ACCEL_MODE_VIA_D3D11)); std::unique_ptr acceleration_policy (new VPLDX11AccelerationPolicy(std::make_shared(cfg_params_w_dx11))); - mfxLoader mfx_handle = MFXLoad(); + mfxLoader test_mfx_handle = MFXLoad(); - mfxConfig cfg_inst_0 = MFXCreateConfig(mfx_handle); + mfxConfig cfg_inst_0 = MFXCreateConfig(test_mfx_handle); EXPECT_TRUE(cfg_inst_0); mfxVariant mfx_param_0; mfx_param_0.Type = MFX_VARIANT_TYPE_U32; @@ -621,7 +622,7 @@ TEST(OneVPL_Source_DX11_Accel_VPL, Init) EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_0,(mfxU8 *)CfgParam::implementation_name(), mfx_param_0), MFX_ERR_NONE); - mfxConfig cfg_inst_1 = MFXCreateConfig(mfx_handle); + mfxConfig cfg_inst_1 = MFXCreateConfig(test_mfx_handle); EXPECT_TRUE(cfg_inst_1); mfxVariant mfx_param_1; mfx_param_1.Type = MFX_VARIANT_TYPE_U32; @@ -629,7 +630,7 @@ TEST(OneVPL_Source_DX11_Accel_VPL, Init) EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_1,(mfxU8 *)CfgParam::acceleration_mode_name(), mfx_param_1), MFX_ERR_NONE); - mfxConfig cfg_inst_2 = MFXCreateConfig(mfx_handle); + mfxConfig cfg_inst_2 = MFXCreateConfig(test_mfx_handle); EXPECT_TRUE(cfg_inst_2); mfxVariant mfx_param_2; mfx_param_2.Type = MFX_VARIANT_TYPE_U32; @@ -637,7 +638,7 @@ TEST(OneVPL_Source_DX11_Accel_VPL, Init) EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_2,(mfxU8 *)CfgParam::decoder_id_name(), mfx_param_2), MFX_ERR_NONE); - mfxConfig cfg_inst_3 = MFXCreateConfig(mfx_handle); + mfxConfig cfg_inst_3 = MFXCreateConfig(test_mfx_handle); EXPECT_TRUE(cfg_inst_3); mfxVariant mfx_param_3; mfx_param_3.Type = MFX_VARIANT_TYPE_U32; @@ -647,7 +648,7 @@ TEST(OneVPL_Source_DX11_Accel_VPL, Init) mfx_param_3), MFX_ERR_NONE); // create session mfxSession mfx_session{}; - mfxStatus sts = MFXCreateSession(mfx_handle, 0, &mfx_session); + mfxStatus sts = MFXCreateSession(test_mfx_handle, 0, &mfx_session); EXPECT_EQ(MFX_ERR_NONE, sts); // assign acceleration @@ -732,7 +733,7 @@ TEST(OneVPL_Source_DX11_Accel_VPL, Init) sess_ptr->init_transcode_surface_pool(vpp_out_pool_key); // prepare working surfaces - sess_ptr->swap_surface(engine); + sess_ptr->swap_decode_surface(engine); sess_ptr->swap_transcode_surface(engine); // launch pipeline @@ -756,11 +757,8 @@ TEST(OneVPL_Source_DX11_Accel_VPL, Init) { my_sess.last_status = MFXVideoDECODE_DecodeFrameAsync(my_sess.session, - (my_sess.data_provider || (my_sess.stream && my_sess.stream->DataLength)) - ? my_sess.stream.get() - - : nullptr, /* No more data to read, start decode draining mode*/ - my_sess.procesing_surface_ptr.lock()->get_handle(), + my_sess.get_mfx_bitstream_ptr(), + my_sess.processing_surface_ptr.lock()->get_handle(), &sync_pair.second, &sync_pair.first); @@ -771,12 +769,12 @@ TEST(OneVPL_Source_DX11_Accel_VPL, Init) my_sess.last_status == MFX_WRN_DEVICE_BUSY) { try { if (my_sess.last_status == MFX_ERR_MORE_SURFACE) { - my_sess.swap_surface(engine); + my_sess.swap_decode_surface(engine); } my_sess.last_status = MFXVideoDECODE_DecodeFrameAsync(my_sess.session, - my_sess.stream.get(), - my_sess.procesing_surface_ptr.lock()->get_handle(), + my_sess.get_mfx_bitstream_ptr(), + my_sess.processing_surface_ptr.lock()->get_handle(), &sync_pair.second, &sync_pair.first); @@ -808,6 +806,224 @@ TEST(OneVPL_Source_DX11_Accel_VPL, Init) } } } + +TEST(OneVPL_Source_DX11_Accel_VPL, preproc) +{ + using namespace cv::gapi::wip::onevpl; + + std::vector cfg_params_w_dx11; + cfg_params_w_dx11.push_back(CfgParam::create_acceleration_mode(MFX_ACCEL_MODE_VIA_D3D11)); + std::unique_ptr acceleration_policy (new VPLDX11AccelerationPolicy(std::make_shared(cfg_params_w_dx11))); + + mfxLoader test_mfx_handle = MFXLoad(); + + mfxConfig cfg_inst_0 = MFXCreateConfig(test_mfx_handle); + EXPECT_TRUE(cfg_inst_0); + mfxVariant mfx_param_0; + mfx_param_0.Type = MFX_VARIANT_TYPE_U32; + mfx_param_0.Data.U32 = MFX_IMPL_TYPE_HARDWARE; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_0,(mfxU8 *)CfgParam::implementation_name(), + mfx_param_0), MFX_ERR_NONE); + + mfxConfig cfg_inst_1 = MFXCreateConfig(test_mfx_handle); + EXPECT_TRUE(cfg_inst_1); + mfxVariant mfx_param_1; + mfx_param_1.Type = MFX_VARIANT_TYPE_U32; + mfx_param_1.Data.U32 = MFX_ACCEL_MODE_VIA_D3D11; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_1,(mfxU8 *)CfgParam::acceleration_mode_name(), + mfx_param_1), MFX_ERR_NONE); + + mfxConfig cfg_inst_2 = MFXCreateConfig(test_mfx_handle); + EXPECT_TRUE(cfg_inst_2); + mfxVariant mfx_param_2; + mfx_param_2.Type = MFX_VARIANT_TYPE_U32; + mfx_param_2.Data.U32 = MFX_CODEC_HEVC; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_2,(mfxU8 *)CfgParam::decoder_id_name(), + mfx_param_2), MFX_ERR_NONE); + + mfxConfig cfg_inst_3 = MFXCreateConfig(test_mfx_handle); + EXPECT_TRUE(cfg_inst_3); + mfxVariant mfx_param_3; + mfx_param_3.Type = MFX_VARIANT_TYPE_U32; + mfx_param_3.Data.U32 = MFX_EXTBUFF_VPP_SCALING; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_3, + (mfxU8 *)"mfxImplDescription.mfxVPPDescription.filter.FilterFourCC", + mfx_param_3), MFX_ERR_NONE); + // create session + mfxSession mfx_decode_session{}; + mfxStatus sts = MFXCreateSession(test_mfx_handle, 0, &mfx_decode_session); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // assign acceleration + EXPECT_NO_THROW(acceleration_policy->init(mfx_decode_session)); + + // create proper bitstream + std::string file_path = findDataFile("highgui/video/big_buck_bunny.h265"); + std::shared_ptr data_provider(new FileDataProvider(file_path, + {CfgParam::create_decoder_id(MFX_CODEC_HEVC)})); + IDataProvider::mfx_codec_id_type decoder_id_name = data_provider->get_mfx_codec_id(); + + // Prepare video param + mfxVideoParam mfxDecParams {}; + mfxDecParams.mfx.CodecId = decoder_id_name; + mfxDecParams.IOPattern = MFX_IOPATTERN_OUT_VIDEO_MEMORY; + + // try fetch & decode input data + sts = MFX_ERR_NONE; + std::shared_ptr bitstream{}; + do { + EXPECT_TRUE(data_provider->fetch_bitstream_data(bitstream)); + sts = MFXVideoDECODE_DecodeHeader(mfx_decode_session, bitstream.get(), &mfxDecParams); + EXPECT_TRUE(MFX_ERR_NONE == sts || MFX_ERR_MORE_DATA == sts); + } while (sts == MFX_ERR_MORE_DATA && !data_provider->empty()); + + EXPECT_EQ(MFX_ERR_NONE, sts); + + mfxFrameAllocRequest request{}; + memset(&request, 0, sizeof(request)); + sts = MFXVideoDECODE_QueryIOSurf(mfx_decode_session, &mfxDecParams, &request); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // Allocate surfaces for decoder + request.Type |= MFX_MEMTYPE_EXTERNAL_FRAME | MFX_MEMTYPE_FROM_DECODE | MFX_MEMTYPE_FROM_VPPIN; + VPLAccelerationPolicy::pool_key_t decode_pool_key = acceleration_policy->create_surface_pool(request, + mfxDecParams.mfx.FrameInfo); + sts = MFXVideoDECODE_Init(mfx_decode_session, &mfxDecParams); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // initialize VPL session + mfxSession mfx_vpl_session{}; + sts = MFXCreateSession(test_mfx_handle, 0, &mfx_vpl_session); + // assign acceleration + EXPECT_NO_THROW(acceleration_policy->init(mfx_vpl_session)); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // request VPL surface + mfxU16 vppOutImgWidth = 672; + mfxU16 vppOutImgHeight = 382; + + mfxVideoParam mfxVPPParams{0}; + mfxVPPParams.vpp.In = mfxDecParams.mfx.FrameInfo; + + mfxVPPParams.vpp.Out.FourCC = MFX_FOURCC_NV12; + mfxVPPParams.vpp.Out.ChromaFormat = MFX_CHROMAFORMAT_YUV420; + mfxVPPParams.vpp.Out.Width = ALIGN16(vppOutImgWidth); + mfxVPPParams.vpp.Out.Height = ALIGN16(vppOutImgHeight); + mfxVPPParams.vpp.Out.CropX = 0; + mfxVPPParams.vpp.Out.CropY = 0; + mfxVPPParams.vpp.Out.CropW = vppOutImgWidth; + mfxVPPParams.vpp.Out.CropH = vppOutImgHeight; + mfxVPPParams.vpp.Out.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; + mfxVPPParams.vpp.Out.FrameRateExtN = 30; + mfxVPPParams.vpp.Out.FrameRateExtD = 1; + + mfxVPPParams.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY; + + mfxFrameAllocRequest vppRequests[2]; + memset(&vppRequests, 0, sizeof(mfxFrameAllocRequest) * 2); + EXPECT_EQ(MFXVideoVPP_QueryIOSurf(mfx_vpl_session, &mfxVPPParams, vppRequests), MFX_ERR_NONE); + + vppRequests[1].AllocId = 666; + VPLAccelerationPolicy::pool_key_t vpp_out_pool_key = + acceleration_policy->create_surface_pool(vppRequests[1], mfxVPPParams.vpp.Out); + EXPECT_EQ(MFXVideoVPP_Init(mfx_vpl_session, &mfxVPPParams), MFX_ERR_NONE); + + // finalize session creation + DecoderParams d_param{bitstream, mfxDecParams}; + TranscoderParams t_param{mfxVPPParams}; + VPLLegacyDecodeEngine engine(std::move(acceleration_policy)); + std::shared_ptr sess_ptr = + engine.register_session( + mfx_decode_session, + std::move(d_param), + data_provider); + + sess_ptr->init_surface_pool(decode_pool_key); + + // prepare working surfaces + sess_ptr->swap_decode_surface(engine); + + // launch pipeline + LegacyDecodeSession &my_sess = *sess_ptr; + + size_t min_available_frames_count = + std::min(engine.get_accel()->get_surface_count(decode_pool_key), + engine.get_accel()->get_surface_count(vpp_out_pool_key)); + size_t frame_num = 0; + do { + if (!my_sess.data_provider) { + my_sess.last_status = MFX_ERR_MORE_DATA; + } else { + my_sess.last_status = MFX_ERR_NONE; + if (!my_sess.data_provider->fetch_bitstream_data(my_sess.stream)) { + my_sess.last_status = MFX_ERR_MORE_DATA; + my_sess.data_provider.reset(); //close source + } + } + + // 2) enqueue ASYNC decode operation + // prepare sync object for new surface + LegacyTranscodeSession::op_handle_t sync_pair{}; + + // enqueue decode operation with current session surface + { + my_sess.last_status = + MFXVideoDECODE_DecodeFrameAsync(my_sess.session, + my_sess.get_mfx_bitstream_ptr(), + my_sess.processing_surface_ptr.lock()->get_handle(), + &sync_pair.second, + &sync_pair.first); + + // process wait-like statuses in-place: + // It had better to use up all VPL decoding resources in pipeline + // as soon as possible. So waiting more free-surface or device free + while (my_sess.last_status == MFX_ERR_MORE_SURFACE || + my_sess.last_status == MFX_WRN_DEVICE_BUSY) { + try { + if (my_sess.last_status == MFX_ERR_MORE_SURFACE) { + my_sess.swap_decode_surface(engine); + } + my_sess.last_status = + MFXVideoDECODE_DecodeFrameAsync(my_sess.session, + my_sess.get_mfx_bitstream_ptr(), + my_sess.processing_surface_ptr.lock()->get_handle(), + &sync_pair.second, + &sync_pair.first); + + } catch (const std::runtime_error&) { + // NB: not an error, yield CPU ticks to check + // surface availability at a next phase. + EXPECT_TRUE(false); + } + } + } + { + do { + my_sess.last_status = MFXVideoCORE_SyncOperation(my_sess.session, sync_pair.first, 0); + // put frames in ready queue on success + if (MFX_ERR_NONE == my_sess.last_status) { + break; + } + } while (MFX_WRN_IN_EXECUTION == my_sess.last_status); + EXPECT_EQ(my_sess.last_status, MFX_ERR_NONE); + } + + // perform VPP operation on decoder synchronized surface + + auto vpp_out = engine.get_accel()->get_free_surface(vpp_out_pool_key).lock(); + EXPECT_TRUE(vpp_out.get()); + my_sess.last_status = MFXVideoVPP_RunFrameVPPAsync(mfx_vpl_session, + sync_pair.second, + vpp_out->get_handle(), + nullptr, &sync_pair.first); + if (my_sess.last_status == MFX_ERR_MORE_SURFACE || + my_sess.last_status == MFX_ERR_NONE) { + my_sess.last_status = MFXVideoCORE_SyncOperation(mfx_vpl_session, sync_pair.first, INFINITE); + EXPECT_EQ(my_sess.last_status, MFX_ERR_NONE); + frame_num++; + } + } while(frame_num < min_available_frames_count); +} #endif // HAVE_DIRECTX #endif // HAVE_D3D11 diff --git a/modules/gapi/test/streaming/gapi_streaming_vpl_data_provider.cpp b/modules/gapi/test/streaming/gapi_streaming_vpl_data_provider.cpp index c8c27fa6a4..ebafb79695 100644 --- a/modules/gapi/test/streaming/gapi_streaming_vpl_data_provider.cpp +++ b/modules/gapi/test/streaming/gapi_streaming_vpl_data_provider.cpp @@ -73,9 +73,9 @@ TEST_P(OneVPL_Source_MFPAsyncDispatcherTest, open_and_decode_file) EXPECT_TRUE(dd_result); // initialize MFX - mfxLoader mfx_handle = MFXLoad(); + mfxLoader mfx = MFXLoad(); - mfxConfig cfg_inst_0 = MFXCreateConfig(mfx_handle); + mfxConfig cfg_inst_0 = MFXCreateConfig(mfx); EXPECT_TRUE(cfg_inst_0); mfxVariant mfx_param_0; mfx_param_0.Type = MFX_VARIANT_TYPE_U32; @@ -85,7 +85,7 @@ TEST_P(OneVPL_Source_MFPAsyncDispatcherTest, open_and_decode_file) // create MFX session mfxSession mfx_session{}; - mfxStatus sts = MFXCreateSession(mfx_handle, 0, &mfx_session); + mfxStatus sts = MFXCreateSession(mfx, 0, &mfx_session); EXPECT_EQ(MFX_ERR_NONE, sts); // create proper bitstream @@ -112,7 +112,7 @@ TEST_P(OneVPL_Source_MFPAsyncDispatcherTest, open_and_decode_file) MFXVideoDECODE_Close(mfx_session); MFXClose(mfx_session); - MFXUnload(mfx_handle); + MFXUnload(mfx); } diff --git a/modules/gapi/test/streaming/gapi_streaming_vpp_preproc_test.cpp b/modules/gapi/test/streaming/gapi_streaming_vpp_preproc_test.cpp new file mode 100644 index 0000000000..f33d967c89 --- /dev/null +++ b/modules/gapi/test/streaming/gapi_streaming_vpp_preproc_test.cpp @@ -0,0 +1,710 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2022 Intel Corporation + + +#include "../test_precomp.hpp" + +#include "../common/gapi_tests_common.hpp" +#include "../common/gapi_streaming_tests_common.hpp" + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include + +#ifdef HAVE_ONEVPL + +#include +#include "streaming/onevpl/file_data_provider.hpp" +#include "streaming/onevpl/cfg_param_device_selector.hpp" + +#include "streaming/onevpl/accelerators/surface/surface.hpp" +#include "streaming/onevpl/accelerators/surface/cpu_frame_adapter.hpp" +#include "streaming/onevpl/accelerators/surface/dx11_frame_adapter.hpp" +#include "streaming/onevpl/accelerators/accel_policy_cpu.hpp" +#include "streaming/onevpl/accelerators/accel_policy_dx11.hpp" +#include "streaming/onevpl/accelerators/dx11_alloc_resource.hpp" +#include "streaming/onevpl/accelerators/utils/shared_lock.hpp" +#define private public +#define protected public +#include "streaming/onevpl/engine/decode/decode_engine_legacy.hpp" +#include "streaming/onevpl/engine/decode/decode_session.hpp" + +#include "streaming/onevpl/engine/preproc/preproc_engine.hpp" +#include "streaming/onevpl/engine/preproc/preproc_session.hpp" +#include "streaming/onevpl/engine/preproc/preproc_dispatcher.hpp" + +#include "streaming/onevpl/engine/transcode/transcode_engine_legacy.hpp" +#include "streaming/onevpl/engine/transcode/transcode_session.hpp" +#undef protected +#undef private +#include "logger.hpp" + +#define ALIGN16(value) (((value + 15) >> 4) << 4) + +namespace opencv_test +{ +namespace +{ +template +cv::MediaFrame extract_decoded_frame(mfxSession sessId, ProcessingEngine& engine) { + using namespace cv::gapi::wip::onevpl; + ProcessingEngineBase::ExecutionStatus status = ProcessingEngineBase::ExecutionStatus::Continue; + while (0 == engine.get_ready_frames_count() && + status == ProcessingEngineBase::ExecutionStatus::Continue) { + status = engine.process(sessId); + } + + if (engine.get_ready_frames_count() == 0) { + GAPI_LOG_WARNING(nullptr, "failed: cannot obtain preprocessed frames, last status: " << + ProcessingEngineBase::status_to_string(status)); + throw std::runtime_error("cannot finalize VPP preprocessing operation"); + } + cv::gapi::wip::Data data; + engine.get_frame(data); + return cv::util::get(data); +} + +std::tuple prepare_mfx(int mfx_codec, int mfx_accel_mode) { + using namespace cv::gapi::wip::onevpl; + mfxLoader mfx = MFXLoad(); + mfxConfig cfg_inst_0 = MFXCreateConfig(mfx); + EXPECT_TRUE(cfg_inst_0); + mfxVariant mfx_param_0; + mfx_param_0.Type = MFX_VARIANT_TYPE_U32; + mfx_param_0.Data.U32 = MFX_IMPL_TYPE_HARDWARE; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_0,(mfxU8 *)CfgParam::implementation_name(), + mfx_param_0), MFX_ERR_NONE); + + mfxConfig cfg_inst_1 = MFXCreateConfig(mfx); + EXPECT_TRUE(cfg_inst_1); + mfxVariant mfx_param_1; + mfx_param_1.Type = MFX_VARIANT_TYPE_U32; + mfx_param_1.Data.U32 = mfx_accel_mode; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_1,(mfxU8 *)CfgParam::acceleration_mode_name(), + mfx_param_1), MFX_ERR_NONE); + + mfxConfig cfg_inst_2 = MFXCreateConfig(mfx); + EXPECT_TRUE(cfg_inst_2); + mfxVariant mfx_param_2; + mfx_param_2.Type = MFX_VARIANT_TYPE_U32; + mfx_param_2.Data.U32 = mfx_codec; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_2,(mfxU8 *)CfgParam::decoder_id_name(), + mfx_param_2), MFX_ERR_NONE); + + mfxConfig cfg_inst_3 = MFXCreateConfig(mfx); + EXPECT_TRUE(cfg_inst_3); + mfxVariant mfx_param_3; + mfx_param_3.Type = MFX_VARIANT_TYPE_U32; + mfx_param_3.Data.U32 = MFX_EXTBUFF_VPP_SCALING; + EXPECT_EQ(MFXSetConfigFilterProperty(cfg_inst_3, + (mfxU8 *)"mfxImplDescription.mfxVPPDescription.filter.FilterFourCC", + mfx_param_3), MFX_ERR_NONE); + return std::make_tuple(mfx, cfg_inst_3); +} + +class SafeQueue { +public: + void push(cv::MediaFrame&& f) { + std::unique_lock lock(mutex); + queue.push(std::move(f)); + cv.notify_all(); + } + + cv::MediaFrame pop() { + cv::MediaFrame ret; + std::unique_lock lock(mutex); + cv.wait(lock, [this] () { + return !queue.empty(); + }); + ret = queue.front(); + queue.pop(); + return ret; + } + + void push_stop() { + push(cv::MediaFrame::Create()); + } + + static bool is_stop(const cv::MediaFrame &f) { + try { + return f.get(); + } catch(...) {} + return false; + } + +private: + struct IStopAdapter final : public cv::MediaFrame::IAdapter { + ~IStopAdapter() {} + cv::GFrameDesc meta() const { return {}; }; + MediaFrame::View access(MediaFrame::Access) { return {{}, {}}; }; + }; +private: + std::condition_variable cv; + std::mutex mutex; + std::queue queue; +}; + +struct EmptyDataProvider : public cv::gapi::wip::onevpl::IDataProvider { + + bool empty() const override { + return true; + } + mfx_codec_id_type get_mfx_codec_id() const override { + return std::numeric_limits::max(); + } + bool fetch_bitstream_data(std::shared_ptr &) override { + return false; + } +}; +} + +using source_t = std::string; +using decoder_t = int; +using acceleration_t = int; +using out_frame_info_t = cv::GFrameDesc; +using preproc_args_t = std::tuple; + +static cv::util::optional empty_roi; + +class VPPPreprocParams : public ::testing::TestWithParam {}; + +preproc_args_t files[] = { + preproc_args_t {"highgui/video/big_buck_bunny.h264", + MFX_CODEC_AVC, MFX_ACCEL_MODE_VIA_D3D11, + cv::GFrameDesc {cv::MediaFormat::NV12, {1920, 1080}}}, + preproc_args_t {"highgui/video/big_buck_bunny.h265", + MFX_CODEC_HEVC, MFX_ACCEL_MODE_VIA_D3D11, + cv::GFrameDesc {cv::MediaFormat::NV12, {1920, 1280}}} +}; + +#ifdef HAVE_DIRECTX +#ifdef HAVE_D3D11 +TEST(OneVPL_Source_PreprocEngine, functional_single_thread) +{ + using namespace cv::gapi::wip::onevpl; + using namespace cv::gapi::wip; + + std::vector cfg_params_w_dx11; + cfg_params_w_dx11.push_back(CfgParam::create_acceleration_mode(MFX_ACCEL_MODE_VIA_D3D11)); + std::unique_ptr decode_accel_policy ( + new VPLDX11AccelerationPolicy(std::make_shared(cfg_params_w_dx11))); + + // create file data provider + std::string file_path = findDataFile("highgui/video/big_buck_bunny.h265"); + std::shared_ptr data_provider(new FileDataProvider(file_path, + {CfgParam::create_decoder_id(MFX_CODEC_HEVC)})); + + mfxLoader mfx{}; + mfxConfig mfx_cfg{}; + std::tie(mfx, mfx_cfg) = prepare_mfx(MFX_CODEC_HEVC, MFX_ACCEL_MODE_VIA_D3D11); + + // create decode session + mfxSession mfx_decode_session{}; + mfxStatus sts = MFXCreateSession(mfx, 0, &mfx_decode_session); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // create decode engine + auto device_selector = decode_accel_policy->get_device_selector(); + VPLLegacyDecodeEngine decode_engine(std::move(decode_accel_policy)); + auto sess_ptr = decode_engine.initialize_session(mfx_decode_session, + cfg_params_w_dx11, + data_provider); + + // simulate net info + cv::GFrameDesc required_frame_param {cv::MediaFormat::NV12, + {1920, 1080}}; + + // create VPP preproc engine + VPPPreprocEngine preproc_engine(std::unique_ptr{ + new VPLDX11AccelerationPolicy(device_selector)}); + + // launch pipeline + // 1) decode frame + cv::MediaFrame first_decoded_frame; + ASSERT_NO_THROW(first_decoded_frame = extract_decoded_frame(sess_ptr->session, decode_engine)); + cv::GFrameDesc first_frame_decoded_desc = first_decoded_frame.desc(); + + // 1.5) create preproc session based on frame description & network info + cv::util::optional first_pp_params = preproc_engine.is_applicable(first_decoded_frame); + ASSERT_TRUE(first_pp_params.has_value()); + pp_session first_pp_sess = preproc_engine.initialize_preproc(first_pp_params.value(), + required_frame_param); + + // 2) make preproc using incoming decoded frame & preproc session + cv::MediaFrame first_pp_frame = preproc_engine.run_sync(first_pp_sess, + first_decoded_frame, + empty_roi); + cv::GFrameDesc first_outcome_pp_desc = first_pp_frame.desc(); + ASSERT_FALSE(first_frame_decoded_desc == first_outcome_pp_desc); + + // do not hold media frames because they share limited DX11 surface pool resources + first_decoded_frame = cv::MediaFrame(); + first_pp_frame = cv::MediaFrame(); + + // make test in loop + bool in_progress = false; + size_t frames_processed_count = 1; + const auto &first_pp_param_value_impl = + cv::util::get(first_pp_params.value().value); + try { + while(true) { + cv::MediaFrame decoded_frame = extract_decoded_frame(sess_ptr->session, decode_engine); + in_progress = true; + ASSERT_EQ(decoded_frame.desc(), first_frame_decoded_desc); + + cv::util::optional params = preproc_engine.is_applicable(decoded_frame); + ASSERT_TRUE(params.has_value()); + const auto &cur_pp_param_value_impl = + cv::util::get(params.value().value); + + ASSERT_EQ(first_pp_param_value_impl.handle, cur_pp_param_value_impl.handle); + ASSERT_TRUE(FrameInfoComparator::equal_to(first_pp_param_value_impl.info, cur_pp_param_value_impl.info)); + + pp_session pp_sess = preproc_engine.initialize_preproc(params.value(), + required_frame_param); + ASSERT_EQ(pp_sess.get().handle.get(), + first_pp_sess.get().handle.get()); + + cv::MediaFrame pp_frame = preproc_engine.run_sync(pp_sess, + decoded_frame, + empty_roi); + cv::GFrameDesc pp_desc = pp_frame.desc(); + ASSERT_TRUE(pp_desc == first_outcome_pp_desc); + in_progress = false; + frames_processed_count++; + } + } catch (...) {} + + // test if interruption has happened + ASSERT_FALSE(in_progress); + ASSERT_NE(frames_processed_count, 1); +} + +void decode_function(cv::gapi::wip::onevpl::VPLLegacyDecodeEngine &decode_engine, + cv::gapi::wip::onevpl::ProcessingEngineBase::session_ptr sess_ptr, + SafeQueue &queue, size_t &decoded_number) { + // decode first frame + { + cv::MediaFrame decoded_frame; + ASSERT_NO_THROW(decoded_frame = extract_decoded_frame(sess_ptr->session, decode_engine)); + queue.push(std::move(decoded_frame)); + } + + // launch pipeline + try { + while(true) { + queue.push(extract_decoded_frame(sess_ptr->session, decode_engine)); + decoded_number++; + } + } catch (...) {} + + // send stop + queue.push_stop(); +} + +void preproc_function(cv::gapi::wip::IPreprocEngine &preproc_engine, SafeQueue&queue, + size_t &preproc_number, const out_frame_info_t &required_frame_param, + const cv::util::optional &roi_rect = {}) { + using namespace cv::gapi::wip; + using namespace cv::gapi::wip::onevpl; + // create preproc session based on frame description & network info + cv::MediaFrame first_decoded_frame = queue.pop(); + cv::util::optional first_pp_params = preproc_engine.is_applicable(first_decoded_frame); + ASSERT_TRUE(first_pp_params.has_value()); + pp_session first_pp_sess = + preproc_engine.initialize_preproc(first_pp_params.value(), + required_frame_param); + + // make preproc using incoming decoded frame & preproc session + cv::MediaFrame first_pp_frame = preproc_engine.run_sync(first_pp_sess, + first_decoded_frame, + roi_rect); + cv::GFrameDesc first_outcome_pp_desc = first_pp_frame.desc(); + + // do not hold media frames because they share limited DX11 surface pool resources + first_decoded_frame = cv::MediaFrame(); + first_pp_frame = cv::MediaFrame(); + + // launch pipeline + bool in_progress = false; + // let's allow counting of preprocessed frames to check this value later: + // Currently, it looks redundant to implement any kind of graceful shutdown logic + // in this test - so let's apply agreement that media source is processed + // successfully when preproc_number != 1 in result. + // Specific validation logic which adhere to explicit counter value may be implemented + // in particular test scope + preproc_number = 1; + try { + while(true) { + cv::MediaFrame decoded_frame = queue.pop(); + if (SafeQueue::is_stop(decoded_frame)) { + break; + } + in_progress = true; + + cv::util::optional params = preproc_engine.is_applicable(decoded_frame); + ASSERT_TRUE(params.has_value()); + const auto &vpp_params = params.value().get(); + const auto &first_vpp_params = first_pp_params.value().get(); + ASSERT_EQ(vpp_params.handle, first_vpp_params.handle); + ASSERT_TRUE(0 == memcmp(&vpp_params.info, &first_vpp_params.info, sizeof(mfxFrameInfo))); + + pp_session pp_sess = preproc_engine.initialize_preproc(params.value(), + required_frame_param); + ASSERT_EQ(pp_sess.get().handle.get(), + first_pp_sess.get().handle.get()); + + cv::MediaFrame pp_frame = preproc_engine.run_sync(pp_sess, decoded_frame, empty_roi); + cv::GFrameDesc pp_desc = pp_frame.desc(); + ASSERT_TRUE(pp_desc == first_outcome_pp_desc); + in_progress = false; + preproc_number++; + } + } catch (...) {} + + // test if interruption has happened + ASSERT_FALSE(in_progress); + ASSERT_NE(preproc_number, 1); +} + +void multi_source_preproc_function(size_t source_num, + cv::gapi::wip::IPreprocEngine &preproc_engine, SafeQueue&queue, + size_t &preproc_number, const out_frame_info_t &required_frame_param, + const cv::util::optional &roi_rect = {}) { + using namespace cv::gapi::wip; + using namespace cv::gapi::wip::onevpl; + // create preproc session based on frame description & network info + cv::MediaFrame first_decoded_frame = queue.pop(); + cv::util::optional first_pp_params = preproc_engine.is_applicable(first_decoded_frame); + ASSERT_TRUE(first_pp_params.has_value()); + pp_session first_pp_sess = + preproc_engine.initialize_preproc(first_pp_params.value(), + required_frame_param); + + // make preproc using incoming decoded frame & preproc session + cv::MediaFrame first_pp_frame = preproc_engine.run_sync(first_pp_sess, + first_decoded_frame, + roi_rect); + cv::GFrameDesc first_outcome_pp_desc = first_pp_frame.desc(); + + // do not hold media frames because they share limited DX11 surface pool resources + first_decoded_frame = cv::MediaFrame(); + first_pp_frame = cv::MediaFrame(); + + // launch pipeline + bool in_progress = false; + preproc_number = 1; + size_t received_stop_count = 0; + try { + while(received_stop_count != source_num) { + cv::MediaFrame decoded_frame = queue.pop(); + if (SafeQueue::is_stop(decoded_frame)) { + ++received_stop_count; + continue; + } + in_progress = true; + + cv::util::optional params = preproc_engine.is_applicable(decoded_frame); + ASSERT_TRUE(params.has_value()); + + pp_session pp_sess = preproc_engine.initialize_preproc(params.value(), + required_frame_param); + cv::MediaFrame pp_frame = preproc_engine.run_sync(pp_sess, decoded_frame, empty_roi); + cv::GFrameDesc pp_desc = pp_frame.desc(); + ASSERT_TRUE(pp_desc == first_outcome_pp_desc); + in_progress = false; + decoded_frame = cv::MediaFrame(); + preproc_number++; + } + } catch (const std::exception& ex) { + GAPI_LOG_WARNING(nullptr, "Caught exception in preproc worker: " << ex.what()); + } + + // test if interruption has happened + if (in_progress) { + while (true) { + cv::MediaFrame decoded_frame = queue.pop(); + if (SafeQueue::is_stop(decoded_frame)) { + break; + } + } + } + ASSERT_FALSE(in_progress); + ASSERT_NE(preproc_number, 1); +} +using roi_t = cv::util::optional; +using preproc_roi_args_t = decltype(std::tuple_cat(std::declval(), + std::declval>())); +class VPPPreprocROIParams : public ::testing::TestWithParam {}; +TEST_P(VPPPreprocROIParams, functional_roi_different_threads) +{ + using namespace cv::gapi::wip; + using namespace cv::gapi::wip::onevpl; + source_t file_path; + decoder_t decoder_id; + acceleration_t accel; + out_frame_info_t required_frame_param; + roi_t opt_roi; + std::tie(file_path, decoder_id, accel, required_frame_param, opt_roi) = GetParam(); + + file_path = findDataFile(file_path); + + std::vector cfg_params_w_dx11; + cfg_params_w_dx11.push_back(CfgParam::create_acceleration_mode(accel)); + std::unique_ptr decode_accel_policy ( + new VPLDX11AccelerationPolicy(std::make_shared(cfg_params_w_dx11))); + + // create file data provider + std::shared_ptr data_provider(new FileDataProvider(file_path, + {CfgParam::create_decoder_id(decoder_id)})); + + mfxLoader mfx{}; + mfxConfig mfx_cfg{}; + std::tie(mfx, mfx_cfg) = prepare_mfx(decoder_id, accel); + + // create decode session + mfxSession mfx_decode_session{}; + mfxStatus sts = MFXCreateSession(mfx, 0, &mfx_decode_session); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // create decode engine + auto device_selector = decode_accel_policy->get_device_selector(); + VPLLegacyDecodeEngine decode_engine(std::move(decode_accel_policy)); + auto sess_ptr = decode_engine.initialize_session(mfx_decode_session, + cfg_params_w_dx11, + data_provider); + + // create VPP preproc engine + VPPPreprocEngine preproc_engine(std::unique_ptr{ + new VPLDX11AccelerationPolicy(device_selector)}); + + // launch threads + SafeQueue queue; + size_t decoded_number = 1; + size_t preproc_number = 0; + + std::thread decode_thread(decode_function, std::ref(decode_engine), sess_ptr, + std::ref(queue), std::ref(decoded_number)); + std::thread preproc_thread(preproc_function, std::ref(preproc_engine), + std::ref(queue), std::ref(preproc_number), + std::cref(required_frame_param), + std::cref(opt_roi)); + + decode_thread.join(); + preproc_thread.join(); + ASSERT_EQ(preproc_number, decoded_number); +} + +preproc_roi_args_t files_w_roi[] = { + preproc_roi_args_t {"highgui/video/big_buck_bunny.h264", + MFX_CODEC_AVC, MFX_ACCEL_MODE_VIA_D3D11, + out_frame_info_t{cv::GFrameDesc {cv::MediaFormat::NV12, {1920, 1080}}}, + roi_t{cv::Rect{0,0,50,50}}}, + preproc_roi_args_t {"highgui/video/big_buck_bunny.h264", + MFX_CODEC_AVC, MFX_ACCEL_MODE_VIA_D3D11, + out_frame_info_t{cv::GFrameDesc {cv::MediaFormat::NV12, {1920, 1080}}}, + roi_t{}}, + preproc_roi_args_t {"highgui/video/big_buck_bunny.h264", + MFX_CODEC_AVC, MFX_ACCEL_MODE_VIA_D3D11, + out_frame_info_t{cv::GFrameDesc {cv::MediaFormat::NV12, {1920, 1080}}}, + roi_t{cv::Rect{0,0,100,100}}}, + preproc_roi_args_t {"highgui/video/big_buck_bunny.h264", + MFX_CODEC_AVC, MFX_ACCEL_MODE_VIA_D3D11, + out_frame_info_t{cv::GFrameDesc {cv::MediaFormat::NV12, {1920, 1080}}}, + roi_t{cv::Rect{100,100,200,200}}}, + preproc_roi_args_t {"highgui/video/big_buck_bunny.h265", + MFX_CODEC_HEVC, MFX_ACCEL_MODE_VIA_D3D11, + out_frame_info_t{cv::GFrameDesc {cv::MediaFormat::NV12, {1920, 1280}}}, + roi_t{cv::Rect{0,0,100,100}}}, + preproc_roi_args_t {"highgui/video/big_buck_bunny.h265", + MFX_CODEC_HEVC, MFX_ACCEL_MODE_VIA_D3D11, + out_frame_info_t{cv::GFrameDesc {cv::MediaFormat::NV12, {1920, 1280}}}, + roi_t{}}, + preproc_roi_args_t {"highgui/video/big_buck_bunny.h265", + MFX_CODEC_HEVC, MFX_ACCEL_MODE_VIA_D3D11, + out_frame_info_t{cv::GFrameDesc {cv::MediaFormat::NV12, {1920, 1280}}}, + roi_t{cv::Rect{100,100,200,200}}} +}; + +INSTANTIATE_TEST_CASE_P(OneVPL_Source_PreprocEngineROI, VPPPreprocROIParams, + testing::ValuesIn(files_w_roi)); + + +using VPPInnerPreprocParams = VPPPreprocParams; +TEST_P(VPPInnerPreprocParams, functional_inner_preproc_size) +{ + using namespace cv::gapi::wip; + using namespace cv::gapi::wip::onevpl; + source_t file_path; + decoder_t decoder_id; + acceleration_t accel; + out_frame_info_t required_frame_param; + std::tie(file_path, decoder_id, accel, required_frame_param) = GetParam(); + + file_path = findDataFile(file_path); + + std::vector cfg_params_w_dx11_vpp; + + // create accel policy + cfg_params_w_dx11_vpp.push_back(CfgParam::create_acceleration_mode(accel)); + std::unique_ptr accel_policy ( + new VPLDX11AccelerationPolicy(std::make_shared(cfg_params_w_dx11_vpp))); + + // create file data provider + std::shared_ptr data_provider(new FileDataProvider(file_path, + {CfgParam::create_decoder_id(decoder_id)})); + + // create decode session + mfxLoader mfx{}; + mfxConfig mfx_cfg{}; + std::tie(mfx, mfx_cfg) = prepare_mfx(decoder_id, accel); + + mfxSession mfx_decode_session{}; + mfxStatus sts = MFXCreateSession(mfx, 0, &mfx_decode_session); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // fill vpp params beforehand: resolution + cfg_params_w_dx11_vpp.push_back(CfgParam::create_vpp_out_width( + static_cast(required_frame_param.size.width))); + cfg_params_w_dx11_vpp.push_back(CfgParam::create_vpp_out_height( + static_cast(required_frame_param.size.height))); + + // create transcode engine + auto device_selector = accel_policy->get_device_selector(); + VPLLegacyTranscodeEngine engine(std::move(accel_policy)); + auto sess_ptr = engine.initialize_session(mfx_decode_session, + cfg_params_w_dx11_vpp, + data_provider); + // make test in loop + bool in_progress = false; + size_t frames_processed_count = 1; + try { + while(true) { + cv::MediaFrame decoded_frame = extract_decoded_frame(sess_ptr->session, engine); + in_progress = true; + ASSERT_EQ(decoded_frame.desc().size.width, + ALIGN16(required_frame_param.size.width)); + ASSERT_EQ(decoded_frame.desc().size.height, + ALIGN16(required_frame_param.size.height)); + ASSERT_EQ(decoded_frame.desc().fmt, required_frame_param.fmt); + frames_processed_count++; + in_progress = false; + } + } catch (...) {} + + // test if interruption has happened + ASSERT_FALSE(in_progress); + ASSERT_NE(frames_processed_count, 1); +} + +INSTANTIATE_TEST_CASE_P(OneVPL_Source_PreprocInner, VPPInnerPreprocParams, + testing::ValuesIn(files)); + +// Dispatcher test suite +class VPPPreprocDispatcherROIParams : public ::testing::TestWithParam {}; +TEST_P(VPPPreprocDispatcherROIParams, functional_roi_different_threads) +{ + using namespace cv::gapi::wip; + using namespace cv::gapi::wip::onevpl; + source_t file_path; + decoder_t decoder_id; + acceleration_t accel = MFX_ACCEL_MODE_VIA_D3D11; + out_frame_info_t required_frame_param; + roi_t opt_roi; + std::tie(file_path, decoder_id, std::ignore, required_frame_param, opt_roi) = GetParam(); + + file_path = findDataFile(file_path); + + std::vector cfg_params_w_dx11; + cfg_params_w_dx11.push_back(CfgParam::create_acceleration_mode(accel)); + std::unique_ptr decode_accel_policy ( + new VPLDX11AccelerationPolicy(std::make_shared(cfg_params_w_dx11))); + + // create file data provider + std::shared_ptr data_provider(new FileDataProvider(file_path, + {CfgParam::create_decoder_id(decoder_id)})); + std::shared_ptr cpu_data_provider(new FileDataProvider(file_path, + {CfgParam::create_decoder_id(decoder_id)})); + + mfxLoader mfx{}; + mfxConfig mfx_cfg{}; + std::tie(mfx, mfx_cfg) = prepare_mfx(decoder_id, accel); + + // create decode session + mfxSession mfx_decode_session{}; + mfxStatus sts = MFXCreateSession(mfx, 0, &mfx_decode_session); + EXPECT_EQ(MFX_ERR_NONE, sts); + + mfxSession mfx_cpu_decode_session{}; + sts = MFXCreateSession(mfx, 0, &mfx_cpu_decode_session); + EXPECT_EQ(MFX_ERR_NONE, sts); + + // create decode engines + auto device_selector = decode_accel_policy->get_device_selector(); + VPLLegacyDecodeEngine decode_engine(std::move(decode_accel_policy)); + auto sess_ptr = decode_engine.initialize_session(mfx_decode_session, + cfg_params_w_dx11, + data_provider); + std::vector cfg_params_cpu; + auto cpu_device_selector = std::make_shared(cfg_params_cpu); + VPLLegacyDecodeEngine cpu_decode_engine(std::unique_ptr{ + new VPLCPUAccelerationPolicy(cpu_device_selector)}); + auto cpu_sess_ptr = cpu_decode_engine.initialize_session(mfx_cpu_decode_session, + cfg_params_cpu, + cpu_data_provider); + + // create VPP preproc engines + VPPPreprocDispatcher preproc_dispatcher; + preproc_dispatcher.insert_worker(std::unique_ptr{ + new VPLDX11AccelerationPolicy(device_selector)}); + preproc_dispatcher.insert_worker(std::unique_ptr{ + new VPLCPUAccelerationPolicy(cpu_device_selector)}); + + // launch threads + SafeQueue queue; + size_t decoded_number = 1; + size_t cpu_decoded_number = 1; + size_t preproc_number = 0; + + std::thread decode_thread(decode_function, std::ref(decode_engine), sess_ptr, + std::ref(queue), std::ref(decoded_number)); + std::thread cpu_decode_thread(decode_function, std::ref(cpu_decode_engine), cpu_sess_ptr, + std::ref(queue), std::ref(cpu_decoded_number)); + std::thread preproc_thread(multi_source_preproc_function, + preproc_dispatcher.size(), + std::ref(preproc_dispatcher), + std::ref(queue), std::ref(preproc_number), + std::cref(required_frame_param), + std::cref(opt_roi)); + + decode_thread.join(); + cpu_decode_thread.join(); + preproc_thread.join(); + ASSERT_EQ(preproc_number, decoded_number + cpu_decoded_number); +} + +INSTANTIATE_TEST_CASE_P(OneVPL_Source_PreprocDispatcherROI, VPPPreprocDispatcherROIParams, + testing::ValuesIn(files_w_roi)); + +#endif // HAVE_DIRECTX +#endif // HAVE_D3D11 +} // namespace opencv_test +#endif // HAVE_ONEVPL diff --git a/modules/imgcodecs/include/opencv2/imgcodecs.hpp b/modules/imgcodecs/include/opencv2/imgcodecs.hpp index 148eea71e7..5732532ca9 100644 --- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp +++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp @@ -88,8 +88,8 @@ enum ImwriteFlags { IMWRITE_JPEG_PROGRESSIVE = 2, //!< Enable JPEG features, 0 or 1, default is False. IMWRITE_JPEG_OPTIMIZE = 3, //!< Enable JPEG features, 0 or 1, default is False. IMWRITE_JPEG_RST_INTERVAL = 4, //!< JPEG restart interval, 0 - 65535, default is 0 - no restart. - IMWRITE_JPEG_LUMA_QUALITY = 5, //!< Separate luma quality level, 0 - 100, default is 0 - don't use. - IMWRITE_JPEG_CHROMA_QUALITY = 6, //!< Separate chroma quality level, 0 - 100, default is 0 - don't use. + IMWRITE_JPEG_LUMA_QUALITY = 5, //!< Separate luma quality level, 0 - 100, default is -1 - don't use. + IMWRITE_JPEG_CHROMA_QUALITY = 6, //!< Separate chroma quality level, 0 - 100, default is -1 - don't use. IMWRITE_PNG_COMPRESSION = 16, //!< For PNG, it can be the compression level from 0 to 9. A higher value means a smaller size and longer compression time. If specified, strategy is changed to IMWRITE_PNG_STRATEGY_DEFAULT (Z_DEFAULT_STRATEGY). Default value is 1 (best speed setting). IMWRITE_PNG_STRATEGY = 17, //!< One of cv::ImwritePNGFlags, default is IMWRITE_PNG_STRATEGY_RLE. IMWRITE_PNG_BILEVEL = 18, //!< Binary level PNG, 0 or 1, default is 0. diff --git a/modules/imgcodecs/src/grfmt_tiff.cpp b/modules/imgcodecs/src/grfmt_tiff.cpp index 36cf17e1e3..2941284e84 100644 --- a/modules/imgcodecs/src/grfmt_tiff.cpp +++ b/modules/imgcodecs/src/grfmt_tiff.cpp @@ -325,6 +325,9 @@ bool TiffDecoder::readHeader() result = true; break; } + case 10: + case 12: + case 14: case 16: { CV_Check((int)sample_format, sample_format == SAMPLEFORMAT_UINT || sample_format == SAMPLEFORMAT_INT, ""); @@ -347,7 +350,7 @@ bool TiffDecoder::readHeader() result = true; break; default: - CV_Error(cv::Error::StsError, "Invalid bitsperpixel value read from TIFF header! Must be 1, 8, 16, 32 or 64."); + CV_Error(cv::Error::StsError, "Invalid bitsperpixel value read from TIFF header! Must be 1, 8, 10, 12, 14, 16, 32 or 64."); } } } @@ -437,6 +440,147 @@ static void fixOrientation(Mat &img, uint16 orientation, int dst_bpp) } } +static void _unpack10To16(const uchar* src, const uchar* srcEnd, ushort* dst, ushort* dstEnd, size_t expectedDstElements) +{ + //5*8b=4*10b : 5 src for 4 dst + constexpr const size_t packedBitsCount = 10; + constexpr const size_t packedBitsMask = ((1<(srcEnd-src)/srcElementsPerPacket), + (static_cast(dstEnd-dst)/dstElementsPerPacket) + }); + union { + uint64_t u64; + uint8_t u8[8]; + } buf = {0}; + for(size_t i = 0 ; i(buf.u64 & packedBitsMask); + buf.u64 >>= packedBitsCount; + } + dst += dstElementsPerPacket; + } + size_t remainingDstElements = std::min( + expectedDstElements-fullPacketsCount*dstElementsPerPacket, + static_cast(dstEnd-dst) + ); + bool stop = !remainingDstElements; + while(!stop) + { + for(size_t j = 0 ; j((buf.u64 >> (bitsPerPacket-(j+1)*packedBitsCount)) & packedBitsMask); + } + }//end while(!stop) +} +//end _unpack10To16() + +static void _unpack12To16(const uchar* src, const uchar* srcEnd, ushort* dst, ushort* dstEnd, size_t expectedDstElements) +{ + //3*8b=2*12b : 3 src for 2 dst + constexpr const size_t packedBitsCount = 12; + constexpr const size_t packedBitsMask = ((1<(srcEnd-src)/srcElementsPerPacket), + (static_cast(dstEnd-dst)/dstElementsPerPacket) + }); + union { + uint32_t u32; + uint8_t u8[4]; + } buf = {0}; + for(size_t i = 0 ; i(buf.u32 & packedBitsMask); + buf.u32 >>= packedBitsCount; + } + dst += dstElementsPerPacket; + } + size_t remainingDstElements = std::min( + expectedDstElements-fullPacketsCount*dstElementsPerPacket, + static_cast(dstEnd-dst) + ); + bool stop = !remainingDstElements; + while(!stop) + { + for(size_t j = 0 ; j((buf.u32 >> (bitsPerPacket-(j+1)*packedBitsCount)) & packedBitsMask); + } + }//end while(!stop) +} +//end _unpack12To16() + +static void _unpack14To16(const uchar* src, const uchar* srcEnd, ushort* dst, ushort* dstEnd, size_t expectedDstElements) +{ + //7*8b=4*14b : 7 src for 4 dst + constexpr const size_t packedBitsCount = 14; + constexpr const size_t packedBitsMask = ((1<(srcEnd-src)/srcElementsPerPacket), + (static_cast(dstEnd-dst)/dstElementsPerPacket) + }); + union { + uint64_t u64; + uint8_t u8[8]; + } buf = {0}; + for(size_t i = 0 ; i(buf.u64 & packedBitsMask); + buf.u64 >>= packedBitsCount; + } + dst += dstElementsPerPacket; + } + size_t remainingDstElements = std::min( + expectedDstElements-fullPacketsCount*dstElementsPerPacket, + static_cast(dstEnd-dst) + ); + bool stop = !remainingDstElements; + while(!stop) + { + for(size_t j = 0 ; j((buf.u64 >> (bitsPerPacket-(j+1)*packedBitsCount)) & packedBitsMask); + } + }//end while(!stop) +} +//end _unpack14To16() + bool TiffDecoder::readData( Mat& img ) { int type = img.type(); @@ -470,7 +614,7 @@ bool TiffDecoder::readData( Mat& img ) CV_TIFF_CHECK_CALL_DEBUG(TIFFGetField(tif, TIFFTAG_SAMPLESPERPIXEL, &ncn)); uint16 img_orientation = ORIENTATION_TOPLEFT; CV_TIFF_CHECK_CALL_DEBUG(TIFFGetField(tif, TIFFTAG_ORIENTATION, &img_orientation)); - const int bitsPerByte = 8; + constexpr const int bitsPerByte = 8; int dst_bpp = (int)(img.elemSize1() * bitsPerByte); bool vert_flip = dst_bpp == 8 && (img_orientation == ORIENTATION_BOTRIGHT || img_orientation == ORIENTATION_RIGHTBOT || @@ -529,10 +673,15 @@ bool TiffDecoder::readData( Mat& img ) CV_Assert(ncn == img.channels()); CV_TIFF_CHECK_CALL(TIFFSetField(tif, TIFFTAG_SAMPLEFORMAT, SAMPLEFORMAT_IEEEFP)); } - const size_t buffer_size = (bpp / bitsPerByte) * ncn * tile_height0 * tile_width0; - AutoBuffer _buffer(buffer_size); - uchar* buffer = _buffer.data(); - ushort* buffer16 = (ushort*)buffer; + const size_t src_buffer_bytes_per_row = divUp(static_cast(ncn * tile_width0 * bpp), static_cast(bitsPerByte)); + const size_t src_buffer_size = tile_height0 * src_buffer_bytes_per_row; + const size_t src_buffer_unpacked_bytes_per_row = divUp(static_cast(ncn * tile_width0 * dst_bpp), static_cast(bitsPerByte)); + const size_t src_buffer_unpacked_size = tile_height0 * src_buffer_unpacked_bytes_per_row; + const bool needsUnpacking = (bpp < dst_bpp); + AutoBuffer _src_buffer(src_buffer_size); + uchar* src_buffer = _src_buffer.data(); + AutoBuffer _src_buffer_unpacked(needsUnpacking ? src_buffer_unpacked_size : 0); + uchar* src_buffer_unpacked = needsUnpacking ? _src_buffer_unpacked.data() : nullptr; int tileidx = 0; for (int y = 0; y < m_height; y += (int)tile_height0) @@ -549,14 +698,14 @@ bool TiffDecoder::readData( Mat& img ) { case 8: { - uchar* bstart = buffer; + uchar* bstart = src_buffer; if (!is_tiled) { - CV_TIFF_CHECK_CALL(TIFFReadRGBAStrip(tif, y, (uint32*)buffer)); + CV_TIFF_CHECK_CALL(TIFFReadRGBAStrip(tif, y, (uint32*)src_buffer)); } else { - CV_TIFF_CHECK_CALL(TIFFReadRGBATile(tif, x, y, (uint32*)buffer)); + CV_TIFF_CHECK_CALL(TIFFReadRGBATile(tif, x, y, (uint32*)src_buffer)); // Tiles fill the buffer from the bottom up bstart += (tile_height0 - tile_height) * tile_width0 * 4; } @@ -594,28 +743,48 @@ bool TiffDecoder::readData( Mat& img ) { if (!is_tiled) { - CV_TIFF_CHECK_CALL((int)TIFFReadEncodedStrip(tif, tileidx, (uint32*)buffer, buffer_size) >= 0); + CV_TIFF_CHECK_CALL((int)TIFFReadEncodedStrip(tif, tileidx, (uint32*)src_buffer, src_buffer_size) >= 0); } else { - CV_TIFF_CHECK_CALL((int)TIFFReadEncodedTile(tif, tileidx, (uint32*)buffer, buffer_size) >= 0); + CV_TIFF_CHECK_CALL((int)TIFFReadEncodedTile(tif, tileidx, (uint32*)src_buffer, src_buffer_size) >= 0); } for (int i = 0; i < tile_height; i++) { + ushort* buffer16 = (ushort*)(src_buffer+i*src_buffer_bytes_per_row); + if (needsUnpacking) + { + const uchar* src_packed = src_buffer+i*src_buffer_bytes_per_row; + uchar* dst_unpacked = src_buffer_unpacked+i*src_buffer_unpacked_bytes_per_row; + if (bpp == 10) + _unpack10To16(src_packed, src_packed+src_buffer_bytes_per_row, + (ushort*)dst_unpacked, (ushort*)(dst_unpacked+src_buffer_unpacked_bytes_per_row), + ncn * tile_width0); + else if (bpp == 12) + _unpack12To16(src_packed, src_packed+src_buffer_bytes_per_row, + (ushort*)dst_unpacked, (ushort*)(dst_unpacked+src_buffer_unpacked_bytes_per_row), + ncn * tile_width0); + else if (bpp == 14) + _unpack14To16(src_packed, src_packed+src_buffer_bytes_per_row, + (ushort*)dst_unpacked, (ushort*)(dst_unpacked+src_buffer_unpacked_bytes_per_row), + ncn * tile_width0); + buffer16 = (ushort*)dst_unpacked; + } + if (color) { if (ncn == 1) { CV_CheckEQ(wanted_channels, 3, ""); - icvCvt_Gray2BGR_16u_C1C3R(buffer16 + i*tile_width0*ncn, 0, + icvCvt_Gray2BGR_16u_C1C3R(buffer16, 0, img.ptr(img_y + i, x), 0, Size(tile_width, 1)); } else if (ncn == 3) { CV_CheckEQ(wanted_channels, 3, ""); - icvCvt_RGB2BGR_16u_C3R(buffer16 + i*tile_width0*ncn, 0, + icvCvt_RGB2BGR_16u_C3R(buffer16, 0, img.ptr(img_y + i, x), 0, Size(tile_width, 1)); } @@ -623,14 +792,14 @@ bool TiffDecoder::readData( Mat& img ) { if (wanted_channels == 4) { - icvCvt_BGRA2RGBA_16u_C4R(buffer16 + i*tile_width0*ncn, 0, + icvCvt_BGRA2RGBA_16u_C4R(buffer16, 0, img.ptr(img_y + i, x), 0, Size(tile_width, 1)); } else { CV_CheckEQ(wanted_channels, 3, "TIFF-16bpp: BGR/BGRA images are supported only"); - icvCvt_BGRA2BGR_16u_C4C3R(buffer16 + i*tile_width0*ncn, 0, + icvCvt_BGRA2BGR_16u_C4C3R(buffer16, 0, img.ptr(img_y + i, x), 0, Size(tile_width, 1), 2); } @@ -646,12 +815,12 @@ bool TiffDecoder::readData( Mat& img ) if( ncn == 1 ) { memcpy(img.ptr(img_y + i, x), - buffer16 + i*tile_width0*ncn, + buffer16, tile_width*sizeof(ushort)); } else { - icvCvt_BGRA2Gray_16u_CnC1R(buffer16 + i*tile_width0*ncn, 0, + icvCvt_BGRA2Gray_16u_CnC1R(buffer16, 0, img.ptr(img_y + i, x), 0, Size(tile_width, 1), ncn, 2); } @@ -665,14 +834,14 @@ bool TiffDecoder::readData( Mat& img ) { if( !is_tiled ) { - CV_TIFF_CHECK_CALL((int)TIFFReadEncodedStrip(tif, tileidx, buffer, buffer_size) >= 0); + CV_TIFF_CHECK_CALL((int)TIFFReadEncodedStrip(tif, tileidx, src_buffer, src_buffer_size) >= 0); } else { - CV_TIFF_CHECK_CALL((int)TIFFReadEncodedTile(tif, tileidx, buffer, buffer_size) >= 0); + CV_TIFF_CHECK_CALL((int)TIFFReadEncodedTile(tif, tileidx, src_buffer, src_buffer_size) >= 0); } - Mat m_tile(Size(tile_width0, tile_height0), CV_MAKETYPE((dst_bpp == 32) ? (depth == CV_32S ? CV_32S : CV_32F) : CV_64F, ncn), buffer); + Mat m_tile(Size(tile_width0, tile_height0), CV_MAKETYPE((dst_bpp == 32) ? (depth == CV_32S ? CV_32S : CV_32F) : CV_64F, ncn), src_buffer); Rect roi_tile(0, 0, tile_width, tile_height); Rect roi_img(x, img_y, tile_width, tile_height); if (!m_hdr && ncn == 3) @@ -691,6 +860,8 @@ bool TiffDecoder::readData( Mat& img ) } // for x } // for y } + if (bpp < dst_bpp) + img *= (1<<(dst_bpp-bpp)); fixOrientation(img, img_orientation, dst_bpp); } diff --git a/modules/imgcodecs/test/test_tiff.cpp b/modules/imgcodecs/test/test_tiff.cpp index 1c6e4a6b29..28e084d5b0 100644 --- a/modules/imgcodecs/test/test_tiff.cpp +++ b/modules/imgcodecs/test/test_tiff.cpp @@ -117,6 +117,116 @@ TEST(Imgcodecs_Tiff, decode_tile_remainder) // What about 32, 64 bit? } +TEST(Imgcodecs_Tiff, decode_10_12_14) +{ + /* see issue #21700 + */ + const string root = cvtest::TS::ptr()->get_data_path(); + + const double maxDiff = 256;//samples do not have the exact same values because of the tool that created them + cv::Mat tmp; + double diff = 0; + + cv::Mat img8UC1 = imread(root + "readwrite/pattern_8uc1.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img8UC1.empty()); + ASSERT_EQ(img8UC1.type(), CV_8UC1); + + cv::Mat img8UC3 = imread(root + "readwrite/pattern_8uc3.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img8UC3.empty()); + ASSERT_EQ(img8UC3.type(), CV_8UC3); + + cv::Mat img8UC4 = imread(root + "readwrite/pattern_8uc4.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img8UC4.empty()); + ASSERT_EQ(img8UC4.type(), CV_8UC4); + + cv::Mat img16UC1 = imread(root + "readwrite/pattern_16uc1.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img16UC1.empty()); + ASSERT_EQ(img16UC1.type(), CV_16UC1); + ASSERT_EQ(img8UC1.size(), img16UC1.size()); + img8UC1.convertTo(tmp, img16UC1.type(), (1U<<(16-8))); + diff = cv::norm(tmp.reshape(1), img16UC1.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img16UC3 = imread(root + "readwrite/pattern_16uc3.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img16UC3.empty()); + ASSERT_EQ(img16UC3.type(), CV_16UC3); + ASSERT_EQ(img8UC3.size(), img16UC3.size()); + img8UC3.convertTo(tmp, img16UC3.type(), (1U<<(16-8))); + diff = cv::norm(tmp.reshape(1), img16UC3.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img16UC4 = imread(root + "readwrite/pattern_16uc4.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img16UC4.empty()); + ASSERT_EQ(img16UC4.type(), CV_16UC4); + ASSERT_EQ(img8UC4.size(), img16UC4.size()); + img8UC4.convertTo(tmp, img16UC4.type(), (1U<<(16-8))); + diff = cv::norm(tmp.reshape(1), img16UC4.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img10UC1 = imread(root + "readwrite/pattern_10uc1.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img10UC1.empty()); + ASSERT_EQ(img10UC1.type(), CV_16UC1); + ASSERT_EQ(img10UC1.size(), img16UC1.size()); + diff = cv::norm(img10UC1.reshape(1), img16UC1.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img10UC3 = imread(root + "readwrite/pattern_10uc3.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img10UC3.empty()); + ASSERT_EQ(img10UC3.type(), CV_16UC3); + ASSERT_EQ(img10UC3.size(), img16UC3.size()); + diff = cv::norm(img10UC3.reshape(1), img16UC3.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img10UC4 = imread(root + "readwrite/pattern_10uc4.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img10UC4.empty()); + ASSERT_EQ(img10UC4.type(), CV_16UC4); + ASSERT_EQ(img10UC4.size(), img16UC4.size()); + diff = cv::norm(img10UC4.reshape(1), img16UC4.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img12UC1 = imread(root + "readwrite/pattern_12uc1.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img12UC1.empty()); + ASSERT_EQ(img12UC1.type(), CV_16UC1); + ASSERT_EQ(img12UC1.size(), img16UC1.size()); + diff = cv::norm(img12UC1.reshape(1), img16UC1.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img12UC3 = imread(root + "readwrite/pattern_12uc3.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img12UC3.empty()); + ASSERT_EQ(img12UC3.type(), CV_16UC3); + ASSERT_EQ(img12UC3.size(), img16UC3.size()); + diff = cv::norm(img12UC3.reshape(1), img16UC3.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img12UC4 = imread(root + "readwrite/pattern_12uc4.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img12UC4.empty()); + ASSERT_EQ(img12UC4.type(), CV_16UC4); + ASSERT_EQ(img12UC4.size(), img16UC4.size()); + diff = cv::norm(img12UC4.reshape(1), img16UC4.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img14UC1 = imread(root + "readwrite/pattern_14uc1.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img14UC1.empty()); + ASSERT_EQ(img14UC1.type(), CV_16UC1); + ASSERT_EQ(img14UC1.size(), img16UC1.size()); + diff = cv::norm(img14UC1.reshape(1), img16UC1.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img14UC3 = imread(root + "readwrite/pattern_14uc3.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img14UC3.empty()); + ASSERT_EQ(img14UC3.type(), CV_16UC3); + ASSERT_EQ(img14UC3.size(), img16UC3.size()); + diff = cv::norm(img14UC3.reshape(1), img16UC3.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); + + cv::Mat img14UC4 = imread(root + "readwrite/pattern_14uc4.tif", cv::IMREAD_UNCHANGED); + ASSERT_FALSE(img14UC4.empty()); + ASSERT_EQ(img14UC4.type(), CV_16UC4); + ASSERT_EQ(img14UC4.size(), img16UC4.size()); + diff = cv::norm(img14UC4.reshape(1), img16UC4.reshape(1), cv::NORM_INF); + ASSERT_LE(diff, maxDiff); +} + TEST(Imgcodecs_Tiff, decode_infinite_rowsperstrip) { const uchar sample_data[142] = { diff --git a/modules/imgproc/src/colormap.cpp b/modules/imgproc/src/colormap.cpp index 26371edad6..966e39eaab 100644 --- a/modules/imgproc/src/colormap.cpp +++ b/modules/imgproc/src/colormap.cpp @@ -734,12 +734,57 @@ namespace colormap Mat src = _src.getMat(); if(src.type() != CV_8UC1 && src.type() != CV_8UC3) CV_Error(Error::StsBadArg, "cv::ColorMap only supports source images of type CV_8UC1 or CV_8UC3"); - // Turn into a BGR matrix into its grayscale representation. - if(src.type() == CV_8UC3) - cvtColor(src.clone(), src, COLOR_BGR2GRAY); - cvtColor(src.clone(), src, COLOR_GRAY2BGR); - // Apply the ColorMap. - LUT(src, _lut, _dst); + + CV_CheckEQ(src.dims, 2, "Not supported"); + + CV_Assert(_lut.isContinuous()); + const int lut_type = _lut.type(); + CV_CheckType(lut_type, (lut_type == CV_8UC1) || (lut_type == CV_8UC3), + "Only CV_8UC1 and CV_8UC3 LUT are supported"); + + Mat srcGray; + if (src.channels() == 1) + srcGray = src; + else + cv::cvtColor(src, srcGray, cv::COLOR_BGR2GRAY);//BGR because of historical cv::LUT() usage + + _dst.create(src.size(), lut_type); + Mat dstMat = _dst.getMat(); + + //we do not use cv::LUT() which requires src.channels() == dst.channels() + const int rows = srcGray.rows; + const int cols = srcGray.cols; + const int minimalPixelsPerPacket = 1<<12; + const int rowsPerPacket = std::max(1, minimalPixelsPerPacket/cols); + const int rowsPacketsCount = (rows+rowsPerPacket-1)/rowsPerPacket; + const Range all(0, rows); + + if (lut_type == CV_8UC1) { + typedef unsigned char lut_pixel_t; + const lut_pixel_t* srcLUT = _lut.ptr(0); + auto body = [&, cols](const Range& range) -> void { + for(int row = range.start ; row(row); + lut_pixel_t* dstRow = dstMat.ptr(row); + for(int col = 0 ; col(0); + auto body = [&, cols](const Range& range) -> void { + for(int row = range.start ; row(row); + lut_pixel_t* dstRow = dstMat.ptr(row); + for(int col = 0 ; col=0 && r-2>=0 && img_row_prev_prev[c-1]>0 #define condition_c r-2>=0 && img_row_prev_prev[c]>0 - #define condition_d c+1=0 && img_row_prev_prev[c+1]>0 - #define condition_e c+2=0 && img_row_prev[c-1]>0 + #define condition_d c+1=0 && img_row_prev_prev[c+1]>0 + #define condition_e c+2=0 && img_row_prev_prev[c+2]>0 #define condition_g c-2>=0 && r-1>=0 && img_row_prev[c-2]>0 #define condition_h c-1>=0 && r-1>=0 && img_row_prev[c-1]>0 diff --git a/modules/imgproc/src/intersection.cpp b/modules/imgproc/src/intersection.cpp index 47d3f3f457..b9659f666e 100644 --- a/modules/imgproc/src/intersection.cpp +++ b/modules/imgproc/src/intersection.cpp @@ -51,15 +51,15 @@ static int _rotatedRectangleIntersection( const RotatedRect& rect1, const Rotate { CV_INSTRUMENT_REGION(); - // L2 metric - const float samePointEps = std::max(1e-16f, 1e-6f * (float)std::max(rect1.size.area(), rect2.size.area())); - Point2f vec1[4], vec2[4]; Point2f pts1[4], pts2[4]; rect1.points(pts1); rect2.points(pts2); + // L2 metric + float samePointEps = 1e-6f * (float)std::max(rect1.size.area(), rect2.size.area()); + int ret = INTERSECT_FULL; // Specical case of rect1 == rect2 @@ -99,14 +99,22 @@ static int _rotatedRectangleIntersection( const RotatedRect& rect1, const Rotate vec2[i].y = pts2[(i+1)%4].y - pts2[i].y; } + //we adapt the epsilon to the smallest dimension of the rects + for( int i = 0; i < 4; i++ ) + { + samePointEps = std::min(samePointEps, std::sqrt(vec1[i].x*vec1[i].x+vec1[i].y*vec1[i].y)); + samePointEps = std::min(samePointEps, std::sqrt(vec2[i].x*vec2[i].x+vec2[i].y*vec2[i].y)); + } + samePointEps = std::max(1e-16f, samePointEps); + // Line test - test all line combos for intersection for( int i = 0; i < 4; i++ ) { for( int j = 0; j < 4; j++ ) { // Solve for 2x2 Ax=b - float x21 = pts2[j].x - pts1[i].x; - float y21 = pts2[j].y - pts1[i].y; + const float x21 = pts2[j].x - pts1[i].x; + const float y21 = pts2[j].y - pts1[i].y; float vx1 = vec1[i].x; float vy1 = vec1[i].y; @@ -114,10 +122,22 @@ static int _rotatedRectangleIntersection( const RotatedRect& rect1, const Rotate float vx2 = vec2[j].x; float vy2 = vec2[j].y; - float det = vx2*vy1 - vx1*vy2; + float normalizationScale = std::min(vx1*vx1+vy1*vy1, vx2*vx2+vy2*vy2);//sum of squares : this is >= 0 + //normalizationScale is a square, and we usually limit accuracy around 1e-6, so normalizationScale should be rather limited by ((1e-6)^2)=1e-12 + normalizationScale = (normalizationScale < 1e-12f) ? 1.f : 1.f/normalizationScale; - float t1 = (vx2*y21 - vy2*x21) / det; - float t2 = (vx1*y21 - vy1*x21) / det; + vx1 *= normalizationScale; + vy1 *= normalizationScale; + vx2 *= normalizationScale; + vy2 *= normalizationScale; + + const float det = vx2*vy1 - vx1*vy2; + if (std::abs(det) < 1e-12)//like normalizationScale, we consider accuracy around 1e-6, i.e. 1e-12 when squared + continue; + const float detInvScaled = normalizationScale/det; + + const float t1 = (vx2*y21 - vy2*x21)*detInvScaled; + const float t2 = (vx1*y21 - vy1*x21)*detInvScaled; // This takes care of parallel lines if( cvIsInf(t1) || cvIsInf(t2) || cvIsNaN(t1) || cvIsNaN(t2) ) @@ -127,8 +147,8 @@ static int _rotatedRectangleIntersection( const RotatedRect& rect1, const Rotate if( t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f ) { - float xi = pts1[i].x + vec1[i].x*t1; - float yi = pts1[i].y + vec1[i].y*t1; + const float xi = pts1[i].x + vec1[i].x*t1; + const float yi = pts1[i].y + vec1[i].y*t1; intersection.push_back(Point2f(xi,yi)); } @@ -149,18 +169,20 @@ static int _rotatedRectangleIntersection( const RotatedRect& rect1, const Rotate int posSign = 0; int negSign = 0; - float x = pts1[i].x; - float y = pts1[i].y; + const float x = pts1[i].x; + const float y = pts1[i].y; for( int j = 0; j < 4; j++ ) { + float normalizationScale = vec2[j].x*vec2[j].x+vec2[j].y*vec2[j].y; + normalizationScale = (normalizationScale < 1e-12f) ? 1.f : 1.f/normalizationScale; // line equation: Ax + By + C = 0 // see which side of the line this point is at - float A = -vec2[j].y; - float B = vec2[j].x; - float C = -(A*pts2[j].x + B*pts2[j].y); + const float A = -vec2[j].y*normalizationScale ; + const float B = vec2[j].x*normalizationScale ; + const float C = -(A*pts2[j].x + B*pts2[j].y); - float s = A*x+ B*y+ C; + const float s = A*x + B*y + C; if( s >= 0 ) { @@ -187,18 +209,22 @@ static int _rotatedRectangleIntersection( const RotatedRect& rect1, const Rotate int posSign = 0; int negSign = 0; - float x = pts2[i].x; - float y = pts2[i].y; + const float x = pts2[i].x; + const float y = pts2[i].y; for( int j = 0; j < 4; j++ ) { // line equation: Ax + By + C = 0 // see which side of the line this point is at - float A = -vec1[j].y; - float B = vec1[j].x; - float C = -(A*pts1[j].x + B*pts1[j].y); + float normalizationScale = vec2[j].x*vec2[j].x+vec2[j].y*vec2[j].y; + normalizationScale = (normalizationScale < 1e-12f) ? 1.f : 1.f/normalizationScale; + if (std::isinf(normalizationScale )) + normalizationScale = 1.f; + const float A = -vec1[j].y*normalizationScale ; + const float B = vec1[j].x*normalizationScale ; + const float C = -(A*pts1[j].x + B*pts1[j].y); - float s = A*x + B*y + C; + const float s = A*x + B*y + C; if( s >= 0 ) { @@ -223,7 +249,7 @@ static int _rotatedRectangleIntersection( const RotatedRect& rect1, const Rotate } // Get rid of duplicated points - int Nstride = N; + const int Nstride = N; cv::AutoBuffer distPt(N * N); cv::AutoBuffer ptDistRemap(N); for (int i = 0; i < N; ++i) @@ -233,7 +259,7 @@ static int _rotatedRectangleIntersection( const RotatedRect& rect1, const Rotate for (int j = i + 1; j < N; ) { const Point2f pt1 = intersection[j]; - float d2 = normL2Sqr(pt1 - pt0); + const float d2 = normL2Sqr(pt1 - pt0); if(d2 <= samePointEps) { if (j < N - 1) @@ -252,10 +278,10 @@ static int _rotatedRectangleIntersection( const RotatedRect& rect1, const Rotate float minD = distPt[1]; for (int i = 0; i < N - 1; ++i) { - float* pDist = distPt.data() + Nstride * ptDistRemap[i]; + const float* pDist = distPt.data() + Nstride * ptDistRemap[i]; for (int j = i + 1; j < N; ++j) { - float d = pDist[ptDistRemap[j]]; + const float d = pDist[ptDistRemap[j]]; if (d < minD) { minD = d; diff --git a/modules/imgproc/src/smooth.dispatch.cpp b/modules/imgproc/src/smooth.dispatch.cpp index 69d07580f2..b001a37f89 100644 --- a/modules/imgproc/src/smooth.dispatch.cpp +++ b/modules/imgproc/src/smooth.dispatch.cpp @@ -566,7 +566,7 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, if (IPP_DISABLE_GAUSSIAN_BLUR_32FC4_1TH && (threads == 1 && src.type() == CV_32FC4)) return false; - if(IPP_GAUSSIANBLUR_PARALLEL && threads > 1) { + if(IPP_GAUSSIANBLUR_PARALLEL && threads > 1 && iwSrc.m_size.height/(threads * 4) >= ksize.height/2) { bool ok; ipp_gaussianBlurParallel invoker(iwSrc, iwDst, ksize.width, (float) sigma1, ippBorder, &ok); diff --git a/modules/imgproc/test/test_filter.cpp b/modules/imgproc/test/test_filter.cpp index 578b44716d..d27258fe0c 100644 --- a/modules/imgproc/test/test_filter.cpp +++ b/modules/imgproc/test/test_filter.cpp @@ -2355,5 +2355,16 @@ TEST(Imgproc, filter_empty_src_16857) EXPECT_TRUE(dst2.empty()); } +TEST(Imgproc_GaussianBlur, regression_11303) +{ + cv::Mat dst; + int width = 2115; + int height = 211; + double sigma = 8.64421; + cv::Mat src(cv::Size(width, height), CV_32F, 1); + cv::GaussianBlur(src, dst, cv::Size(), sigma, sigma); + EXPECT_LE(cv::norm(src, dst, NORM_L2), 1e-3); +} + }} // namespace diff --git a/modules/imgproc/test/test_intersection.cpp b/modules/imgproc/test/test_intersection.cpp index c455c439fc..9ba3bf8125 100644 --- a/modules/imgproc/test/test_intersection.cpp +++ b/modules/imgproc/test/test_intersection.cpp @@ -366,6 +366,84 @@ TEST(Imgproc_RotatedRectangleIntersection, regression_12221_2) EXPECT_LE(intersections.size(), (size_t)8); } +TEST(Imgproc_RotatedRectangleIntersection, accuracy_21659) +{ + float scaleFactor = 1000;//to challenge the normalizationScale in the algorithm + cv::RectanglesIntersectTypes intersectionResult = cv::RectanglesIntersectTypes::INTERSECT_NONE; + std::vector intersection; + double intersectionArea = 0; + cv::RotatedRect r1 = cv::RotatedRect(cv::Point2f(.5f, .5f)*scaleFactor, cv::Size2f(1.f, 1.f)*scaleFactor, 0); + cv::RotatedRect r2; + + r2 = cv::RotatedRect(cv::Point2f(-2.f, -2.f)*scaleFactor, cv::Size2f(1.f, 1.f)*scaleFactor, 0); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_EQ(cv::RectanglesIntersectTypes::INTERSECT_NONE, intersectionResult); + ASSERT_LE(std::abs(intersectionArea-0), 1e-1); + + r2 = cv::RotatedRect(cv::Point2f(1.5f, .5f)*scaleFactor, cv::Size2f(1.f, 2.f)*scaleFactor, 0); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_EQ(cv::RectanglesIntersectTypes::INTERSECT_PARTIAL, intersectionResult); + ASSERT_LE(std::abs(intersectionArea-0), 1e-1); + + r2 = cv::RotatedRect(cv::Point2f(1.5f, 1.5f)*scaleFactor, cv::Size2f(1.f, 1.f)*scaleFactor, 0); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_EQ(cv::RectanglesIntersectTypes::INTERSECT_PARTIAL, intersectionResult); + ASSERT_LE(std::abs(intersectionArea-0), 1e-1); + + r2 = cv::RotatedRect(cv::Point2f(.5f, .5f)*scaleFactor, cv::Size2f(1.f, 1.f)*scaleFactor, 0); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_EQ(cv::RectanglesIntersectTypes::INTERSECT_FULL, intersectionResult); + ASSERT_LE(std::abs(intersectionArea-r2.size.area()), 1e-1); + + r2 = cv::RotatedRect(cv::Point2f(.5f, .5f)*scaleFactor, cv::Size2f(.5f, .5f)*scaleFactor, 0); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_EQ(cv::RectanglesIntersectTypes::INTERSECT_FULL, intersectionResult); + ASSERT_LE(std::abs(intersectionArea-r2.size.area()), 1e-1); + + r2 = cv::RotatedRect(cv::Point2f(.5f, .5f)*scaleFactor, cv::Size2f(2.f, .5f)*scaleFactor, 0); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_EQ(cv::RectanglesIntersectTypes::INTERSECT_PARTIAL, intersectionResult); + ASSERT_LE(std::abs(intersectionArea-500000), 1e-1); + + r2 = cv::RotatedRect(cv::Point2f(.5f, .5f)*scaleFactor, cv::Size2f(1.f, 1.f)*scaleFactor, 45); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_EQ(cv::RectanglesIntersectTypes::INTERSECT_PARTIAL, intersectionResult); + ASSERT_LE(std::abs(intersectionArea-828427), 1e-1); + + r2 = cv::RotatedRect(cv::Point2f(1.f, 1.f)*scaleFactor, cv::Size2f(1.f, 1.f)*scaleFactor, 45); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_EQ(cv::RectanglesIntersectTypes::INTERSECT_PARTIAL, intersectionResult); + ASSERT_LE(std::abs(intersectionArea-250000), 1e-1); + + //see #21659 + r1 = cv::RotatedRect(cv::Point2f(4.48589373f, 12.5545063f), cv::Size2f(4.0f, 4.0f), 0.0347290039f); + r2 = cv::RotatedRect(cv::Point2f(4.48589373f, 12.5545235f), cv::Size2f(4.0f, 4.0f), 0.0347290039f); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_EQ(cv::RectanglesIntersectTypes::INTERSECT_PARTIAL, intersectionResult); + ASSERT_LE(std::abs(intersectionArea-r1.size.area()), 1e-3); + + r1 = cv::RotatedRect(cv::Point2f(4.48589373f, 12.5545063f + 0.01f), cv::Size2f(4.0f, 4.0f), 0.0347290039f); + r2 = cv::RotatedRect(cv::Point2f(4.48589373f, 12.5545235f), cv::Size2f(4.0f, 4.0f), 0.0347290039f); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_LE(std::abs(intersectionArea-r1.size.area()), 1e-1); + + r1 = cv::RotatedRect(cv::Point2f(45.0715866f, 39.8825722f), cv::Size2f(3.0f, 3.0f), 0.10067749f); + r2 = cv::RotatedRect(cv::Point2f(45.0715866f, 39.8825874f), cv::Size2f(3.0f, 3.0f), 0.10067749f); + intersectionResult = (cv::RectanglesIntersectTypes) cv::rotatedRectangleIntersection(r1, r2, intersection); + intersectionArea = (intersection.size() <= 2) ? 0. : cv::contourArea(intersection); + ASSERT_LE(std::abs(intersectionArea-r1.size.area()), 1e-3); +} + TEST(Imgproc_RotatedRectangleIntersection, regression_18520) { RotatedRect rr_empty( diff --git a/modules/java/generator/android/java/org/opencv/android/CameraBridgeViewBase.java b/modules/java/generator/android/java/org/opencv/android/CameraBridgeViewBase.java index 07c059b7d9..1993cf1407 100644 --- a/modules/java/generator/android/java/org/opencv/android/CameraBridgeViewBase.java +++ b/modules/java/generator/android/java/org/opencv/android/CameraBridgeViewBase.java @@ -376,7 +376,7 @@ public abstract class CameraBridgeViewBase extends SurfaceView implements Surfac if (!connectCamera(getWidth(), getHeight())) { AlertDialog ad = new AlertDialog.Builder(getContext()).create(); ad.setCancelable(false); // This blocks the 'BACK' button - ad.setMessage("It seems that you device does not support camera (or it is locked). Application will be closed."); + ad.setMessage("It seems that your device does not support camera (or it is locked). Application will be closed."); ad.setButton(DialogInterface.BUTTON_NEUTRAL, "OK", new DialogInterface.OnClickListener() { public void onClick(DialogInterface dialog, int which) { dialog.dismiss(); diff --git a/modules/objc/generator/gen_objc.py b/modules/objc/generator/gen_objc.py index 21df446a26..06d5341827 100755 --- a/modules/objc/generator/gen_objc.py +++ b/modules/objc/generator/gen_objc.py @@ -82,6 +82,9 @@ ManualFuncs = {} # { class : { func : { arg_name : {"ctype" : ctype, "attrib" : [attrib]} } } } func_arg_fix = {} +# { class : { func : { prolog : "", epilog : "" } } } +header_fix = {} + # { class : { enum: fixed_enum } } enum_fix = {} @@ -485,6 +488,9 @@ class FuncInfo(GeneralInfo): self.ctype = re.sub(r"^CvTermCriteria", "TermCriteria", decl[1] or "") self.args = [] func_fix_map = func_arg_fix.get(self.classname or module, {}).get(self.objc_name, {}) + header_fixes = header_fix.get(self.classname or module, {}).get(self.objc_name, {}) + self.prolog = header_fixes.get('prolog', None) + self.epilog = header_fixes.get('epilog', None) for a in decl[3]: arg = a[:] arg_fix_map = func_fix_map.get(arg[1], {}) @@ -1178,6 +1184,9 @@ class ObjectiveCWrapperGenerator(object): objc_name = fi.objc_name if not constructor else ("init" + ("With" + (args[0].name[0].upper() + args[0].name[1:]) if len(args) > 0 else "")) ) + if fi.prolog is not None: + method_declarations.write("\n%s\n\n" % fi.prolog) + method_declarations.write( Template( """$prototype$swift_name$deprecation_decl; @@ -1189,6 +1198,9 @@ class ObjectiveCWrapperGenerator(object): ) ) + if fi.epilog is not None: + method_declarations.write("%s\n\n" % fi.epilog) + method_implementations.write( Template( """$prototype {$prologue $ret_val$obj_deref$cv_name($cv_args)$tail;$epilogue$ret @@ -1654,6 +1666,7 @@ if __name__ == "__main__": AdditionalImports[module] = gen_type_dict.get("AdditionalImports", {}) ManualFuncs.update(gen_type_dict.get("ManualFuncs", {})) func_arg_fix.update(gen_type_dict.get("func_arg_fix", {})) + header_fix.update(gen_type_dict.get("header_fix", {})) enum_fix.update(gen_type_dict.get("enum_fix", {})) const_fix.update(gen_type_dict.get("const_fix", {})) namespaces_dict.update(gen_type_dict.get("namespaces_dict", {})) diff --git a/modules/photo/src/seamless_cloning.hpp b/modules/photo/src/seamless_cloning.hpp index 92b24e7b09..4d43970d2d 100644 --- a/modules/photo/src/seamless_cloning.hpp +++ b/modules/photo/src/seamless_cloning.hpp @@ -53,7 +53,7 @@ namespace cv class Cloning { public: - void normalClone(const cv::Mat& destination, const cv::Mat &mask, const cv::Mat &wmask, cv::Mat &cloned, int flag); + void normalClone(const cv::Mat& destination, const cv::Mat &mask, cv::Mat &wmask, cv::Mat &cloned, int flag); void illuminationChange(cv::Mat &I, cv::Mat &mask, cv::Mat &wmask, cv::Mat &cloned, float alpha, float beta); void localColorChange(cv::Mat &I, cv::Mat &mask, cv::Mat &wmask, cv::Mat &cloned, float red_mul, float green_mul, float blue_mul); void textureFlatten(cv::Mat &I, cv::Mat &mask, cv::Mat &wmask, float low_threshold, float high_threhold, int kernel_size, cv::Mat &cloned); @@ -61,10 +61,10 @@ namespace cv protected: void initVariables(const cv::Mat &destination, const cv::Mat &binaryMask); - void computeDerivatives(const cv::Mat &destination, const cv::Mat &patch, const cv::Mat &binaryMask); + void computeDerivatives(const cv::Mat &destination, const cv::Mat &patch, cv::Mat &binaryMask); void scalarProduct(cv::Mat mat, float r, float g, float b); void poisson(const cv::Mat &destination); - void evaluate(const cv::Mat &I, const cv::Mat &wmask, const cv::Mat &cloned); + void evaluate(const cv::Mat &I, cv::Mat &wmask, const cv::Mat &cloned); void dst(const Mat& src, Mat& dest, bool invert = false); void solve(const Mat &img, Mat& mod_diff, Mat &result); diff --git a/modules/photo/src/seamless_cloning_impl.cpp b/modules/photo/src/seamless_cloning_impl.cpp index 8fd4bc7865..4b3258a1d9 100644 --- a/modules/photo/src/seamless_cloning_impl.cpp +++ b/modules/photo/src/seamless_cloning_impl.cpp @@ -246,7 +246,7 @@ void Cloning::initVariables(const Mat &destination, const Mat &binaryMask) filter_Y[j] = 2.0f * (float)std::cos(scale * (j + 1)); } -void Cloning::computeDerivatives(const Mat& destination, const Mat &patch, const Mat &binaryMask) +void Cloning::computeDerivatives(const Mat& destination, const Mat &patch, Mat &binaryMask) { initVariables(destination, binaryMask); @@ -306,7 +306,7 @@ void Cloning::poisson(const Mat &destination) } } -void Cloning::evaluate(const Mat &I, const Mat &wmask, const Mat &cloned) +void Cloning::evaluate(const Mat &I, Mat &wmask, const Mat &cloned) { bitwise_not(wmask,wmask); @@ -320,7 +320,7 @@ void Cloning::evaluate(const Mat &I, const Mat &wmask, const Mat &cloned) merge(output,cloned); } -void Cloning::normalClone(const Mat &destination, const Mat &patch, const Mat &binaryMask, Mat &cloned, int flag) +void Cloning::normalClone(const Mat &destination, const Mat &patch, Mat &binaryMask, Mat &cloned, int flag) { const int w = destination.cols; const int h = destination.rows; diff --git a/modules/python/src2/cv2.cpp b/modules/python/src2/cv2.cpp index 294905c783..5d952412f3 100644 --- a/modules/python/src2/cv2.cpp +++ b/modules/python/src2/cv2.cpp @@ -79,9 +79,9 @@ static int convert_to_char(PyObject *o, char *dst, const ArgInfo& info) #include "pyopencv_generated_enums.h" #ifdef CVPY_DYNAMIC_INIT -#define CVPY_TYPE(WNAME, NAME, STORAGE, SNAME, _1, _2) CVPY_TYPE_DECLARE_DYNAMIC(WNAME, NAME, STORAGE, SNAME) +#define CVPY_TYPE(EXPORT_NAME, CLASS_ID, STORAGE, SNAME, _1, _2, SCOPE) CVPY_TYPE_DECLARE_DYNAMIC(EXPORT_NAME, CLASS_ID, STORAGE, SNAME, SCOPE) #else -#define CVPY_TYPE(WNAME, NAME, STORAGE, SNAME, _1, _2) CVPY_TYPE_DECLARE(WNAME, NAME, STORAGE, SNAME) +#define CVPY_TYPE(EXPORT_NAME, CLASS_ID, STORAGE, SNAME, _1, _2, SCOPE) CVPY_TYPE_DECLARE(EXPORT_NAME, CLASS_ID, STORAGE, SNAME, SCOPE) #endif #include "pyopencv_generated_types.h" #undef CVPY_TYPE @@ -281,6 +281,189 @@ static bool init_submodule(PyObject * root, const char * name, PyMethodDef * met return true; } +static inline +bool registerTypeInModuleScope(PyObject* module, const char* type_name, PyObject* type_obj) +{ + if (PyModule_AddObject(module, type_name, type_obj) < 0) + { + PyErr_Format(PyExc_ImportError, + "Failed to register type '%s' in module scope '%s'", + type_name, PyModule_GetName(module) + ); + Py_DECREF(type_obj); + return false; + } + return true; +} + +static inline +bool registerTypeInClassScope(PyObject* cls, const char* type_name, PyObject* type_obj) +{ + if (!PyType_CheckExact(cls)) { + PyErr_Format(PyExc_ImportError, + "Failed to register type '%s' in class scope. " + "Scope class object has a wrong type", type_name + ); + return false; + } + if (PyObject_SetAttrString(cls, type_name, type_obj) < 0) + { + #ifndef Py_LIMITED_API + PyObject* cls_dict = reinterpret_cast(cls)->tp_dict; + if (PyDict_SetItemString(cls_dict, type_name, type_obj) >= 0) { + /// Clearing the error set by PyObject_SetAttrString: + /// TypeError: can't set attributes of built-in/extension type NAME + PyErr_Clear(); + return true; + } + #endif + const std::string cls_name = getPyObjectNameAttr(cls); + PyErr_Format(PyExc_ImportError, + "Failed to register type '%s' in '%s' class scope. Can't update scope dictionary", + type_name, cls_name.c_str() + ); + return false; + } + return true; +} + +static inline +PyObject* getScopeFromTypeObject(PyObject* obj, const std::string& scope_name) +{ + if (!PyType_CheckExact(obj)) { + const std::string type_name = getPyObjectNameAttr(obj); + return PyErr_Format(PyExc_ImportError, + "Failed to get scope from type '%s' " + "Scope class object has a wrong type", type_name.c_str() + ); + } + /// When using LIMITED API all classes are registered in the heap +#if defined(Py_LIMITED_API) + return PyObject_GetAttrString(obj, scope_name.c_str()); +#else + /// Otherwise classes may be registed on the stack or heap + PyObject* type_dict = reinterpret_cast(obj)->tp_dict; + if (!type_dict) { + const std::string type_name = getPyObjectNameAttr(obj); + return PyErr_Format(PyExc_ImportError, + "Failed to get scope from type '%s' " + "Type dictionary is not available", type_name.c_str() + ); + } + return PyDict_GetItemString(type_dict, scope_name.c_str()); +#endif // Py_LIMITED_API +} + +static inline +PyObject* findTypeScope(PyObject* root_module, const std::string& scope_name) +{ + PyObject* scope = root_module; + if (scope_name.empty()) + { + return scope; + } + /// Starting with 1 to omit leading dot in the scope name + size_t name_end = scope_name.find('.', 1); + if (name_end == std::string::npos) + { + name_end = scope_name.size(); + } + for (size_t name_start = 1; name_start < scope_name.size() && scope; ) + { + const std::string current_scope_name = scope_name.substr(name_start, + name_end - name_start); + + if (PyModule_CheckExact(scope)) + { + PyObject* scope_dict = PyModule_GetDict(scope); + if (!scope_dict) + { + return PyErr_Format(PyExc_ImportError, + "Scope '%s' dictionary is not available during the search for " + " the '%s' scope object", current_scope_name.c_str(), + scope_name.c_str() + ); + } + + scope = PyDict_GetItemString(scope_dict, current_scope_name.c_str()); + } + else if (PyType_CheckExact(scope)) + { + scope = getScopeFromTypeObject(scope, current_scope_name); + } + else + { + return PyErr_Format(PyExc_ImportError, + "Can't find scope '%s'. '%s' doesn't reference a module or a class", + scope_name.c_str(), current_scope_name.c_str() + ); + } + + + name_start = name_end + 1; + name_end = scope_name.find('.', name_start); + if (name_end == std::string::npos) + { + name_end = scope_name.size(); + } + } + if (!scope) + { + return PyErr_Format(PyExc_ImportError, + "Module or class with name '%s' can't be found in '%s' module", + scope_name.c_str(), PyModule_GetName(root_module) + ); + } + return scope; +} + +static bool registerNewType(PyObject* root_module, const char* type_name, + PyObject* type_obj, const std::string& scope_name) +{ + PyObject* scope = findTypeScope(root_module, scope_name); + + /// If scope can't be found it means that there is an error during + /// bindings generation + if (!scope) { + return false; + } + + if (PyModule_CheckExact(scope)) + { + if (!registerTypeInModuleScope(scope, type_name, type_obj)) + { + return false; + } + } + else + { + /// In Python 2 it is disallowed to register an inner classes + /// via modifing dictionary of the built-in type. + if (!registerTypeInClassScope(scope, type_name, type_obj)) + { + return false; + } + } + + /// Expose all classes that are defined in the submodules as aliases in the + /// root module for backward compatibility + /// If submodule and root module are same than no aliases registration are + /// required + if (scope != root_module) + { + std::string type_name_str(type_name); + + std::string alias_name; + alias_name.reserve(scope_name.size() + type_name_str.size()); + std::replace_copy(scope_name.begin() + 1, scope_name.end(), std::back_inserter(alias_name), '.', '_'); + alias_name += '_'; + alias_name += type_name_str; + + return registerTypeInModuleScope(root_module, alias_name.c_str(), type_obj); + } + return true; +} + #include "pyopencv_generated_modules_content.h" static bool init_body(PyObject * m) @@ -294,10 +477,10 @@ static bool init_body(PyObject * m) #undef CVPY_MODULE #ifdef CVPY_DYNAMIC_INIT -#define CVPY_TYPE(WNAME, NAME, _1, _2, BASE, CONSTRUCTOR) CVPY_TYPE_INIT_DYNAMIC(WNAME, NAME, return false, BASE, CONSTRUCTOR) +#define CVPY_TYPE(EXPORT_NAME, CLASS_ID, _1, _2, BASE, CONSTRUCTOR, SCOPE) CVPY_TYPE_INIT_DYNAMIC(EXPORT_NAME, CLASS_ID, return false, BASE, CONSTRUCTOR, SCOPE) PyObject * pyopencv_NoBase_TypePtr = NULL; #else -#define CVPY_TYPE(WNAME, NAME, _1, _2, BASE, CONSTRUCTOR) CVPY_TYPE_INIT_STATIC(WNAME, NAME, return false, BASE, CONSTRUCTOR) +#define CVPY_TYPE(EXPORT_NAME, CLASS_ID, _1, _2, BASE, CONSTRUCTOR, SCOPE) CVPY_TYPE_INIT_STATIC(EXPORT_NAME, CLASS_ID, return false, BASE, CONSTRUCTOR, SCOPE) PyTypeObject * pyopencv_NoBase_TypePtr = NULL; #endif #include "pyopencv_generated_types.h" diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py index babded785d..1e3ba1150d 100755 --- a/modules/python/src2/gen2.py +++ b/modules/python/src2/gen2.py @@ -246,10 +246,20 @@ class ClassProp(object): self.readonly = False class ClassInfo(object): - def __init__(self, name, decl=None): + def __init__(self, name, decl=None, codegen=None): + # Scope name can be a module or other class e.g. cv::SimpleBlobDetector::Params + scope_name, self.original_name = name.rsplit(".", 1) + + # In case scope refer the outer class exported with different name + if codegen: + scope_name = codegen.get_export_scope_name(scope_name) + self.scope_name = re.sub(r"^cv\.?", "", scope_name) + + self.export_name = self.original_name + + self.class_id = normalize_class_name(name) + self.cname = name.replace(".", "::") - self.name = self.wname = normalize_class_name(name) - self.sname = name[name.rfind('.') + 1:] self.ismap = False self.issimple = False self.isalgorithm = False @@ -260,12 +270,11 @@ class ClassInfo(object): self.consts = {} self.base = None self.constructor = None - customname = False if decl: bases = decl[1].split()[1:] if len(bases) > 1: - print("Note: Class %s has more than 1 base class (not supported by Python C extensions)" % (self.name,)) + print("Note: Class %s has more than 1 base class (not supported by Python C extensions)" % (self.cname,)) print(" Bases: ", " ".join(bases)) print(" Only the first base class will be used") #return sys.exit(-1) @@ -279,13 +288,8 @@ class ClassInfo(object): for m in decl[2]: if m.startswith("="): - wname = m[1:] - npos = name.rfind('.') - if npos >= 0: - self.wname = normalize_class_name(name[:npos] + '.' + wname) - else: - self.wname = wname - customname = True + # Aliasing only affects the exported class name, not class identifier + self.export_name = m[1:] elif m == "/Map": self.ismap = True elif m == "/Simple": @@ -294,8 +298,35 @@ class ClassInfo(object): self.isparams = True self.props = [ClassProp(p) for p in decl[3]] - if not customname and self.wname.startswith("Cv"): - self.wname = self.wname[2:] + if not self.has_export_alias and self.original_name.startswith("Cv"): + self.export_name = self.export_name[2:] + + @property + def wname(self): + if len(self.scope_name) > 0: + return self.scope_name.replace(".", "_") + "_" + self.export_name + + return self.export_name + + @property + def name(self): + return self.class_id + + @property + def full_scope_name(self): + return "cv." + self.scope_name if len(self.scope_name) else "cv" + + @property + def full_export_name(self): + return self.full_scope_name + "." + self.export_name + + @property + def full_original_name(self): + return self.full_scope_name + "." + self.original_name + + @property + def has_export_alias(self): + return self.export_name != self.original_name def gen_map_code(self, codegen): all_classes = codegen.classes @@ -349,9 +380,11 @@ class ClassInfo(object): methods_code.write(m.gen_code(codegen)) methods_inits.write(m.get_tab_entry()) - code = gen_template_type_impl.substitute(name=self.name, wname=self.wname, cname=self.cname, - getset_code=getset_code.getvalue(), getset_inits=getset_inits.getvalue(), - methods_code=methods_code.getvalue(), methods_inits=methods_inits.getvalue()) + code = gen_template_type_impl.substitute(name=self.name, + getset_code=getset_code.getvalue(), + getset_inits=getset_inits.getvalue(), + methods_code=methods_code.getvalue(), + methods_inits=methods_inits.getvalue()) return code @@ -365,13 +398,15 @@ class ClassInfo(object): if self.constructor is not None: constructor_name = self.constructor.get_wrapper_name() - return "CVPY_TYPE({}, {}, {}, {}, {}, {});\n".format( - self.wname, - self.name, + return 'CVPY_TYPE({}, {}, {}, {}, {}, {}, "{}");\n'.format( + self.export_name, + self.class_id, self.cname if self.issimple else "Ptr<{}>".format(self.cname), - self.sname if self.issimple else "Ptr", + self.original_name if self.issimple else "Ptr", baseptr, - constructor_name + constructor_name, + # Leading dot is required to provide correct class naming + "." + self.scope_name if len(self.scope_name) > 0 else self.scope_name ) @@ -871,12 +906,12 @@ class FuncInfo(object): classinfo = all_classes[self.classname] #if dump: pprint(vars(classinfo)) if self.isconstructor: - py_name = 'cv.' + classinfo.wname - elif self.is_static: - py_name = '.'.join([self.namespace, classinfo.sname + '_' + self.variants[0].wname]) + py_name = classinfo.full_export_name else: + py_name = classinfo.full_export_name + "." + self.variants[0].wname + + if not self.is_static: cname = classinfo.cname + '::' + cname - py_name = 'cv.' + classinfo.wname + '.' + self.variants[0].wname else: py_name = '.'.join([self.namespace, self.variants[0].wname]) #if dump: print(cname + " => " + py_name) @@ -918,7 +953,7 @@ class PythonWrapperGenerator(object): self.class_idx = 0 def add_class(self, stype, name, decl): - classinfo = ClassInfo(name, decl) + classinfo = ClassInfo(name, decl, self) classinfo.decl_idx = self.class_idx self.class_idx += 1 @@ -928,16 +963,30 @@ class PythonWrapperGenerator(object): sys.exit(-1) self.classes[classinfo.name] = classinfo - # Add Class to json file. - namespace, classes, name = self.split_decl_name(name) + namespace, _, _ = self.split_decl_name(name) namespace = '.'.join(namespace) - name = '_'.join(classes+[name]) + # Registering a namespace if it is not already handled or + # doesn't have anything except classes defined in it + self.namespaces.setdefault(namespace, Namespace()) - py_name = 'cv.' + classinfo.wname # use wrapper name + # Add Class to json file. + py_name = classinfo.full_export_name # use wrapper name py_signatures = self.py_signatures.setdefault(classinfo.cname, []) py_signatures.append(dict(name=py_name)) #print('class: ' + classinfo.cname + " => " + py_name) + def get_export_scope_name(self, original_scope_name): + # Outer classes should be registered before their content - inner classes in this case + class_scope = self.classes.get(normalize_class_name(original_scope_name), None) + + if class_scope: + return class_scope.full_export_name + + # Otherwise it is a namespace. + # If something is messed up at this point - it will be revelead during + # library import + return original_scope_name + def split_decl_name(self, name): chunks = name.split('.') namespace = chunks[:-1] @@ -1027,6 +1076,7 @@ class PythonWrapperGenerator(object): w_classes.append(w_classname) g_wname = "_".join(w_classes+[name]) func_map = self.namespaces.setdefault(namespace_str, Namespace()).funcs + # Exports static function with internal name (backward compatibility) func = func_map.setdefault(g_name, FuncInfo("", g_name, cname, isconstructor, namespace_str, False)) func.add_variant(self.classes, decl, isphantom) if g_wname != g_name: # TODO OpenCV 5.0 @@ -1190,10 +1240,25 @@ class PythonWrapperGenerator(object): classlist1 = [(classinfo.decl_idx, name, classinfo) for name, classinfo in classlist] classlist1.sort() + published_types = set() # ensure toposort with base classes for decl_idx, name, classinfo in classlist1: if classinfo.ismap: continue - self.code_type_publish.write(classinfo.gen_def(self)) + def _registerType(classinfo): + if classinfo.decl_idx in published_types: + #print(classinfo.decl_idx, classinfo.name, ' - already published') + return + published_types.add(classinfo.decl_idx) + + if classinfo.base and classinfo.base in self.classes: + base_classinfo = self.classes[classinfo.base] + #print(classinfo.decl_idx, classinfo.name, ' - request publishing of base type ', base_classinfo.decl_idx, base_classinfo.name) + _registerType(base_classinfo) + + #print(classinfo.decl_idx, classinfo.name, ' - published!') + self.code_type_publish.write(classinfo.gen_def(self)) + + _registerType(classinfo) # step 3: generate the code for all the global functions diff --git a/modules/python/src2/hdr_parser.py b/modules/python/src2/hdr_parser.py index d304e6cfcf..a31713f353 100755 --- a/modules/python/src2/hdr_parser.py +++ b/modules/python/src2/hdr_parser.py @@ -1013,7 +1013,7 @@ class CppHeaderParser(object): docstring = "" if stmt_type == "namespace": chunks = [block[1] for block in self.block_stack if block[0] == 'namespace'] + [name] - self.namespaces.add('.'.join(chunks)) + self.namespaces.add('.'.join(filter(lambda c: len(c)> 0, chunks))) else: stmt_type, name, parse_flag = "block", "", False diff --git a/modules/python/src2/pycompat.hpp b/modules/python/src2/pycompat.hpp index 8b44726d5f..18336d4295 100644 --- a/modules/python/src2/pycompat.hpp +++ b/modules/python/src2/pycompat.hpp @@ -62,6 +62,10 @@ #endif // PY_MAJOR >=3 +#ifndef PyType_CheckExact +#define PyType_CheckExact(obj) (Py_TYPE(op) == &PyType_Type) +#endif // !PyType_CheckExact + static inline bool getUnicodeString(PyObject * obj, std::string &str) { bool res = false; @@ -93,6 +97,26 @@ static inline bool getUnicodeString(PyObject * obj, std::string &str) return res; } +static inline +std::string getPyObjectNameAttr(PyObject* obj) +{ + std::string obj_name; + PyObject* cls_name_obj = PyObject_GetAttrString(obj, "__name__"); + if (cls_name_obj && !getUnicodeString(cls_name_obj, obj_name)) { + obj_name.clear(); + } + #ifndef Py_LIMITED_API + if (PyType_CheckExact(obj) && obj_name.empty()) + { + obj_name = reinterpret_cast(obj)->tp_name; + } + #endif + if (obj_name.empty()) { + obj_name = ""; + } + return obj_name; +} + //================================================================================================== #define CV_PY_FN_WITH_KW_(fn, flags) (PyCFunction)(void*)(PyCFunctionWithKeywords)(fn), (flags) | METH_VARARGS | METH_KEYWORDS @@ -174,107 +198,106 @@ PyObject* pyopencv_from(const TYPE& src) #endif -#define CVPY_TYPE_DECLARE(WNAME, NAME, STORAGE, SNAME) \ - struct pyopencv_##NAME##_t \ +#define CVPY_TYPE_DECLARE(EXPORT_NAME, CLASS_ID, STORAGE, SNAME, SCOPE) \ + struct pyopencv_##CLASS_ID##_t \ { \ PyObject_HEAD \ STORAGE v; \ }; \ - static PyTypeObject pyopencv_##NAME##_TypeXXX = \ + static PyTypeObject pyopencv_##CLASS_ID##_TypeXXX = \ { \ CVPY_TYPE_HEAD \ - MODULESTR"."#WNAME, \ - sizeof(pyopencv_##NAME##_t), \ + MODULESTR SCOPE"."#EXPORT_NAME, \ + sizeof(pyopencv_##CLASS_ID##_t), \ }; \ - static PyTypeObject * pyopencv_##NAME##_TypePtr = &pyopencv_##NAME##_TypeXXX; \ - static bool pyopencv_##NAME##_getp(PyObject * self, STORAGE * & dst) \ + static PyTypeObject * pyopencv_##CLASS_ID##_TypePtr = &pyopencv_##CLASS_ID##_TypeXXX; \ + static bool pyopencv_##CLASS_ID##_getp(PyObject * self, STORAGE * & dst) \ { \ - if (PyObject_TypeCheck(self, pyopencv_##NAME##_TypePtr)) \ + if (PyObject_TypeCheck(self, pyopencv_##CLASS_ID##_TypePtr)) \ { \ - dst = &(((pyopencv_##NAME##_t*)self)->v); \ + dst = &(((pyopencv_##CLASS_ID##_t*)self)->v); \ return true; \ } \ return false; \ } \ - static PyObject * pyopencv_##NAME##_Instance(const STORAGE &r) \ + static PyObject * pyopencv_##CLASS_ID##_Instance(const STORAGE &r) \ { \ - pyopencv_##NAME##_t *m = PyObject_NEW(pyopencv_##NAME##_t, pyopencv_##NAME##_TypePtr); \ + pyopencv_##CLASS_ID##_t *m = PyObject_NEW(pyopencv_##CLASS_ID##_t, pyopencv_##CLASS_ID##_TypePtr); \ new (&(m->v)) STORAGE(r); \ return (PyObject*)m; \ } \ - static void pyopencv_##NAME##_dealloc(PyObject* self) \ + static void pyopencv_##CLASS_ID##_dealloc(PyObject* self) \ { \ - ((pyopencv_##NAME##_t*)self)->v.STORAGE::~SNAME(); \ + ((pyopencv_##CLASS_ID##_t*)self)->v.STORAGE::~SNAME(); \ PyObject_Del(self); \ } \ - static PyObject* pyopencv_##NAME##_repr(PyObject* self) \ + static PyObject* pyopencv_##CLASS_ID##_repr(PyObject* self) \ { \ char str[1000]; \ - sprintf(str, "<"#WNAME" %p>", self); \ + sprintf(str, "< " MODULESTR SCOPE"."#EXPORT_NAME" %p>", self); \ return PyString_FromString(str); \ } -#define CVPY_TYPE_INIT_STATIC(WNAME, NAME, ERROR_HANDLER, BASE, CONSTRUCTOR) \ +#define CVPY_TYPE_INIT_STATIC(EXPORT_NAME, CLASS_ID, ERROR_HANDLER, BASE, CONSTRUCTOR, SCOPE) \ { \ - pyopencv_##NAME##_TypePtr->tp_base = pyopencv_##BASE##_TypePtr; \ - pyopencv_##NAME##_TypePtr->tp_dealloc = pyopencv_##NAME##_dealloc; \ - pyopencv_##NAME##_TypePtr->tp_repr = pyopencv_##NAME##_repr; \ - pyopencv_##NAME##_TypePtr->tp_getset = pyopencv_##NAME##_getseters; \ - pyopencv_##NAME##_TypePtr->tp_init = (initproc) CONSTRUCTOR; \ - pyopencv_##NAME##_TypePtr->tp_methods = pyopencv_##NAME##_methods; \ - pyopencv_##NAME##_TypePtr->tp_alloc = PyType_GenericAlloc; \ - pyopencv_##NAME##_TypePtr->tp_new = PyType_GenericNew; \ - pyopencv_##NAME##_TypePtr->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE; \ - if (PyType_Ready(pyopencv_##NAME##_TypePtr) != 0) \ + pyopencv_##CLASS_ID##_TypePtr->tp_base = pyopencv_##BASE##_TypePtr; \ + pyopencv_##CLASS_ID##_TypePtr->tp_dealloc = pyopencv_##CLASS_ID##_dealloc; \ + pyopencv_##CLASS_ID##_TypePtr->tp_repr = pyopencv_##CLASS_ID##_repr; \ + pyopencv_##CLASS_ID##_TypePtr->tp_getset = pyopencv_##CLASS_ID##_getseters; \ + pyopencv_##CLASS_ID##_TypePtr->tp_init = (initproc) CONSTRUCTOR; \ + pyopencv_##CLASS_ID##_TypePtr->tp_methods = pyopencv_##CLASS_ID##_methods; \ + pyopencv_##CLASS_ID##_TypePtr->tp_alloc = PyType_GenericAlloc; \ + pyopencv_##CLASS_ID##_TypePtr->tp_new = PyType_GenericNew; \ + pyopencv_##CLASS_ID##_TypePtr->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE; \ + if (PyType_Ready(pyopencv_##CLASS_ID##_TypePtr) != 0) \ { \ ERROR_HANDLER; \ } \ - CVPY_TYPE_INCREF(pyopencv_##NAME##_TypePtr); \ - if (PyModule_AddObject(m, #WNAME, (PyObject *)pyopencv_##NAME##_TypePtr) < 0) \ + CVPY_TYPE_INCREF(pyopencv_##CLASS_ID##_TypePtr); \ + if (!registerNewType(m, #EXPORT_NAME, (PyObject*)pyopencv_##CLASS_ID##_TypePtr, SCOPE)) \ { \ - printf("Failed to register a new type: " #WNAME ", base (" #BASE ")\n"); \ - Py_DECREF(pyopencv_##NAME##_TypePtr); \ + printf("Failed to register a new type: " #EXPORT_NAME ", base (" #BASE ") in " SCOPE " \n"); \ ERROR_HANDLER; \ } \ } //================================================================================================== -#define CVPY_TYPE_DECLARE_DYNAMIC(WNAME, NAME, STORAGE, SNAME) \ - struct pyopencv_##NAME##_t \ +#define CVPY_TYPE_DECLARE_DYNAMIC(EXPORT_NAME, CLASS_ID, STORAGE, SNAME, SCOPE) \ + struct pyopencv_##CLASS_ID##_t \ { \ PyObject_HEAD \ STORAGE v; \ }; \ - static PyObject * pyopencv_##NAME##_TypePtr = 0; \ - static bool pyopencv_##NAME##_getp(PyObject * self, STORAGE * & dst) \ + static PyObject * pyopencv_##CLASS_ID##_TypePtr = 0; \ + static bool pyopencv_##CLASS_ID##_getp(PyObject * self, STORAGE * & dst) \ { \ - if (PyObject_TypeCheck(self, (PyTypeObject*)pyopencv_##NAME##_TypePtr)) \ + if (PyObject_TypeCheck(self, (PyTypeObject*)pyopencv_##CLASS_ID##_TypePtr)) \ { \ - dst = &(((pyopencv_##NAME##_t*)self)->v); \ + dst = &(((pyopencv_##CLASS_ID##_t*)self)->v); \ return true; \ } \ return false; \ } \ - static PyObject * pyopencv_##NAME##_Instance(const STORAGE &r) \ + static PyObject * pyopencv_##CLASS_ID##_Instance(const STORAGE &r) \ { \ - pyopencv_##NAME##_t *m = PyObject_New(pyopencv_##NAME##_t, (PyTypeObject*)pyopencv_##NAME##_TypePtr); \ + pyopencv_##CLASS_ID##_t *m = PyObject_New(pyopencv_##CLASS_ID##_t, (PyTypeObject*)pyopencv_##CLASS_ID##_TypePtr); \ new (&(m->v)) STORAGE(r); \ return (PyObject*)m; \ } \ - static void pyopencv_##NAME##_dealloc(PyObject* self) \ + static void pyopencv_##CLASS_ID##_dealloc(PyObject* self) \ { \ - ((pyopencv_##NAME##_t*)self)->v.STORAGE::~SNAME(); \ + ((pyopencv_##CLASS_ID##_t*)self)->v.STORAGE::~SNAME(); \ PyObject_Del(self); \ } \ - static PyObject* pyopencv_##NAME##_repr(PyObject* self) \ + static PyObject* pyopencv_##CLASS_ID##_repr(PyObject* self) \ { \ char str[1000]; \ - sprintf(str, "<"#WNAME" %p>", self); \ + sprintf(str, "< " MODULESTR SCOPE"."#EXPORT_NAME" %p>", self); \ return PyString_FromString(str); \ } \ - static PyType_Slot pyopencv_##NAME##_Slots[] = \ + static PyType_Slot pyopencv_##CLASS_ID##_Slots[] = \ { \ {Py_tp_dealloc, 0}, \ {Py_tp_repr, 0}, \ @@ -285,37 +308,36 @@ PyObject* pyopencv_from(const TYPE& src) {Py_tp_new, 0}, \ {0, 0} \ }; \ - static PyType_Spec pyopencv_##NAME##_Spec = \ + static PyType_Spec pyopencv_##CLASS_ID##_Spec = \ { \ - MODULESTR"."#WNAME, \ - sizeof(pyopencv_##NAME##_t), \ + MODULESTR SCOPE"."#EXPORT_NAME, \ + sizeof(pyopencv_##CLASS_ID##_t), \ 0, \ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, \ - pyopencv_##NAME##_Slots \ + pyopencv_##CLASS_ID##_Slots \ }; -#define CVPY_TYPE_INIT_DYNAMIC(WNAME, NAME, ERROR_HANDLER, BASE, CONSTRUCTOR) \ +#define CVPY_TYPE_INIT_DYNAMIC(EXPORT_NAME, CLASS_ID, ERROR_HANDLER, BASE, CONSTRUCTOR, SCOPE) \ { \ - pyopencv_##NAME##_Slots[0].pfunc /*tp_dealloc*/ = (void*)pyopencv_##NAME##_dealloc; \ - pyopencv_##NAME##_Slots[1].pfunc /*tp_repr*/ = (void*)pyopencv_##NAME##_repr; \ - pyopencv_##NAME##_Slots[2].pfunc /*tp_getset*/ = (void*)pyopencv_##NAME##_getseters; \ - pyopencv_##NAME##_Slots[3].pfunc /*tp_init*/ = (void*) CONSTRUCTOR; \ - pyopencv_##NAME##_Slots[4].pfunc /*tp_methods*/ = pyopencv_##NAME##_methods; \ - pyopencv_##NAME##_Slots[5].pfunc /*tp_alloc*/ = (void*)PyType_GenericAlloc; \ - pyopencv_##NAME##_Slots[6].pfunc /*tp_new*/ = (void*)PyType_GenericNew; \ + pyopencv_##CLASS_ID##_Slots[0].pfunc /*tp_dealloc*/ = (void*)pyopencv_##CLASS_ID##_dealloc; \ + pyopencv_##CLASS_ID##_Slots[1].pfunc /*tp_repr*/ = (void*)pyopencv_##CLASS_ID##_repr; \ + pyopencv_##CLASS_ID##_Slots[2].pfunc /*tp_getset*/ = (void*)pyopencv_##CLASS_ID##_getseters; \ + pyopencv_##CLASS_ID##_Slots[3].pfunc /*tp_init*/ = (void*) CONSTRUCTOR; \ + pyopencv_##CLASS_ID##_Slots[4].pfunc /*tp_methods*/ = pyopencv_##CLASS_ID##_methods; \ + pyopencv_##CLASS_ID##_Slots[5].pfunc /*tp_alloc*/ = (void*)PyType_GenericAlloc; \ + pyopencv_##CLASS_ID##_Slots[6].pfunc /*tp_new*/ = (void*)PyType_GenericNew; \ PyObject * bases = 0; \ if (pyopencv_##BASE##_TypePtr) \ bases = PyTuple_Pack(1, pyopencv_##BASE##_TypePtr); \ - pyopencv_##NAME##_TypePtr = PyType_FromSpecWithBases(&pyopencv_##NAME##_Spec, bases); \ - if (!pyopencv_##NAME##_TypePtr) \ + pyopencv_##CLASS_ID##_TypePtr = PyType_FromSpecWithBases(&pyopencv_##CLASS_ID##_Spec, bases); \ + if (!pyopencv_##CLASS_ID##_TypePtr) \ { \ - printf("Failed to create type from spec: " #WNAME ", base (" #BASE ")\n"); \ + printf("Failed to create type from spec: " #CLASS_ID ", base (" #BASE ")\n"); \ ERROR_HANDLER; \ } \ - if (PyModule_AddObject(m, #WNAME, (PyObject *)pyopencv_##NAME##_TypePtr) < 0) \ + if (!registerNewType(m, #EXPORT_NAME, (PyObject*)pyopencv_##CLASS_ID##_TypePtr, SCOPE)) \ { \ - printf("Failed to register a new type: " #WNAME ", base (" #BASE ")\n"); \ - Py_DECREF(pyopencv_##NAME##_TypePtr); \ + printf("Failed to register a new type: " #EXPORT_NAME ", base (" #BASE ") in " SCOPE " \n"); \ ERROR_HANDLER; \ } \ } diff --git a/modules/python/test/test_misc.py b/modules/python/test/test_misc.py index 48657d595c..fd21656d83 100644 --- a/modules/python/test/test_misc.py +++ b/modules/python/test/test_misc.py @@ -633,6 +633,84 @@ class Arguments(NewOpenCVTests): self.assertEqual(flag, cv.utils.nested.testEchoBooleanFunction(flag), msg="Function in nested module returns wrong result") + def test_class_from_submodule_has_global_alias(self): + self.assertTrue(hasattr(cv.ml, "Boost"), + msg="Class is not registered in the submodule") + self.assertTrue(hasattr(cv, "ml_Boost"), + msg="Class from submodule doesn't have alias in the " + "global module") + self.assertEqual(cv.ml.Boost, cv.ml_Boost, + msg="Classes from submodules and global module don't refer " + "to the same type") + + def test_class_from_submodule_has_global_alias(self): + self.assertTrue(hasattr(cv.ml, "Boost"), + msg="Class is not registered in the submodule") + self.assertTrue(hasattr(cv, "ml_Boost"), + msg="Class from submodule doesn't have alias in the " + "global module") + self.assertEqual(cv.ml.Boost, cv.ml_Boost, + msg="Classes from submodules and global module don't refer " + "to the same type") + + def test_inner_class_has_global_alias(self): + self.assertTrue(hasattr(cv.SimpleBlobDetector, "Params"), + msg="Class is not registered as inner class") + self.assertTrue(hasattr(cv, "SimpleBlobDetector_Params"), + msg="Inner class doesn't have alias in the global module") + self.assertEqual(cv.SimpleBlobDetector.Params, cv.SimpleBlobDetector_Params, + msg="Inner class and class in global module don't refer " + "to the same type") + self.assertTrue(hasattr(cv, "SimpleBlobDetector_Params"), + msg="Inner class doesn't have alias in the global module") + + def test_export_class_with_different_name(self): + self.assertTrue(hasattr(cv.utils.nested, "ExportClassName"), + msg="Class with export alias is not registered in the submodule") + self.assertTrue(hasattr(cv, "utils_nested_ExportClassName"), + msg="Class with export alias doesn't have alias in the " + "global module") + self.assertEqual(cv.utils.nested.ExportClassName.originalName(), "OriginalClassName") + + instance = cv.utils.nested.ExportClassName.create() + self.assertTrue(isinstance(instance, cv.utils.nested.ExportClassName), + msg="Factory function returns wrong class instance: {}".format(type(instance))) + self.assertTrue(hasattr(cv.utils.nested, "ExportClassName_create"), + msg="Factory function should have alias in the same module as the class") + # self.assertFalse(hasattr(cv.utils.nested, "OriginalClassName_create"), + # msg="Factory function should not be registered with original class name, "\ + # "when class has different export name") + + def test_export_inner_class_of_class_exported_with_different_name(self): + if not hasattr(cv.utils.nested, "ExportClassName"): + raise unittest.SkipTest("Outer class with export alias is not registered in the submodule") + + self.assertTrue(hasattr(cv.utils.nested.ExportClassName, "Params"), + msg="Inner class with export alias is not registered in " + "the outer class") + self.assertTrue(hasattr(cv, "utils_nested_ExportClassName_Params"), + msg="Inner class with export alias is not registered in " + "global module") + params = cv.utils.nested.ExportClassName.Params() + params.int_value = 45 + params.float_value = 4.5 + + instance = cv.utils.nested.ExportClassName.create(params) + self.assertTrue(isinstance(instance, cv.utils.nested.ExportClassName), + msg="Factory function returns wrong class instance: {}".format(type(instance))) + self.assertEqual( + params.int_value, instance.getIntParam(), + msg="Class initialized with wrong integer parameter. Expected: {}. Actual: {}".format( + params.int_value, instance.getIntParam() + )) + self.assertEqual( + params.float_value, instance.getFloatParam(), + msg="Class initialized with wrong integer parameter. Expected: {}. Actual: {}".format( + params.float_value, instance.getFloatParam() + )) + + + class CanUsePurePythonModuleFunction(NewOpenCVTests): def test_can_get_ocv_version(self): diff --git a/modules/stitching/include/opencv2/stitching/detail/matchers.hpp b/modules/stitching/include/opencv2/stitching/detail/matchers.hpp index cd9749ca8b..1b7d7d6897 100644 --- a/modules/stitching/include/opencv2/stitching/detail/matchers.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/matchers.hpp @@ -104,8 +104,8 @@ struct CV_EXPORTS_W_SIMPLE MatchesInfo CV_PROP_RW int src_img_idx; CV_PROP_RW int dst_img_idx; //!< Images indices (optional) - std::vector matches; - std::vector inliers_mask; //!< Geometrically consistent matches mask + CV_PROP_RW std::vector matches; + CV_PROP_RW std::vector inliers_mask; //!< Geometrically consistent matches mask CV_PROP_RW int num_inliers; //!< Number of geometrically consistent matches CV_PROP_RW Mat H; //!< Estimated transformation CV_PROP_RW double confidence; //!< Confidence two images are from the same panorama diff --git a/modules/stitching/misc/python/test/test_stitching.py b/modules/stitching/misc/python/test/test_stitching.py index 0d66182fb8..2e7b2b5818 100644 --- a/modules/stitching/misc/python/test/test_stitching.py +++ b/modules/stitching/misc/python/test/test_stitching.py @@ -118,5 +118,22 @@ class stitching_compose_panorama_args(NewOpenCVTests): assert result == 0 +class stitching_matches_info_test(NewOpenCVTests): + + def test_simple(self): + finder = cv.ORB.create() + img1 = self.get_sample('stitching/a1.png') + img2 = self.get_sample('stitching/a2.png') + + img_feat1 = cv.detail.computeImageFeatures2(finder, img1) + img_feat2 = cv.detail.computeImageFeatures2(finder, img2) + + matcher = cv.detail.BestOf2NearestMatcher_create() + matches_info = matcher.apply(img_feat1, img_feat2) + + self.assertIsNotNone(matches_info.matches) + self.assertIsNotNone(matches_info.inliers_mask) + + if __name__ == '__main__': NewOpenCVTests.bootstrap() diff --git a/modules/ts/include/opencv2/ts/ocl_test.hpp b/modules/ts/include/opencv2/ts/ocl_test.hpp index 11572e9f48..717eb7b14c 100644 --- a/modules/ts/include/opencv2/ts/ocl_test.hpp +++ b/modules/ts/include/opencv2/ts/ocl_test.hpp @@ -89,7 +89,7 @@ extern int test_loop_times; #define EXPECT_MAT_NORM(mat, eps) \ do \ { \ - EXPECT_LE(TestUtils::checkNorm1(mat), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNorm1(mat), eps) \ } while ((void)0, 0) #undef EXPECT_MAT_NEAR @@ -98,7 +98,7 @@ do \ { \ ASSERT_EQ(mat1.type(), mat2.type()); \ ASSERT_EQ(mat1.size(), mat2.size()); \ - EXPECT_LE(TestUtils::checkNorm2(mat1, mat2), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNorm2(mat1, mat2), eps) \ << "Size: " << mat1.size() << std::endl; \ } while ((void)0, 0) @@ -107,7 +107,7 @@ do \ { \ ASSERT_EQ((mat1).type(), (mat2).type()); \ ASSERT_EQ((mat1).size(), (mat2).size()); \ - EXPECT_LE(TestUtils::checkNormRelative((mat1), (mat2)), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNormRelative((mat1), (mat2)), eps) \ << "Size: " << (mat1).size() << std::endl; \ } while ((void)0, 0) @@ -146,7 +146,7 @@ do \ { \ ASSERT_EQ(name ## _roi.type(), u ## name ## _roi.type()); \ ASSERT_EQ(name ## _roi.size(), u ## name ## _roi.size()); \ - EXPECT_LE(TestUtils::checkNorm2(name ## _roi, u ## name ## _roi), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNorm2(name ## _roi, u ## name ## _roi), eps) \ << "Size: " << name ## _roi.size() << std::endl; \ Point _offset; \ Size _wholeSize; \ @@ -155,7 +155,7 @@ do \ _mask(Rect(_offset, name ## _roi.size())).setTo(Scalar::all(0)); \ ASSERT_EQ(name.type(), u ## name.type()); \ ASSERT_EQ(name.size(), u ## name.size()); \ - EXPECT_LE(TestUtils::checkNorm2(name, u ## name, _mask), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNorm2(name, u ## name, _mask), eps) \ << "Size: " << name ## _roi.size() << std::endl; \ } while ((void)0, 0) @@ -183,7 +183,7 @@ do \ { \ ASSERT_EQ(name ## _roi.type(), u ## name ## _roi.type()); \ ASSERT_EQ(name ## _roi.size(), u ## name ## _roi.size()); \ - EXPECT_LE(TestUtils::checkNormRelativeSparse(name ## _roi, u ## name ## _roi), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNormRelativeSparse(name ## _roi, u ## name ## _roi), eps) \ << "Size: " << name ## _roi.size() << std::endl; \ Point _offset; \ Size _wholeSize; \ @@ -192,7 +192,7 @@ do \ _mask(Rect(_offset, name ## _roi.size())).setTo(Scalar::all(0)); \ ASSERT_EQ(name.type(), u ## name.type()); \ ASSERT_EQ(name.size(), u ## name.size()); \ - EXPECT_LE(TestUtils::checkNormRelativeSparse(name, u ## name, _mask), eps) \ + EXPECT_LE(cvtest::ocl::TestUtils::checkNormRelativeSparse(name, u ## name, _mask), eps) \ << "Size: " << name ## _roi.size() << std::endl; \ } while ((void)0, 0) diff --git a/modules/video/src/bgfg_KNN.cpp b/modules/video/src/bgfg_KNN.cpp index 1ddf1b7d51..5ec2266921 100644 --- a/modules/video/src/bgfg_KNN.cpp +++ b/modules/video/src/bgfg_KNN.cpp @@ -214,6 +214,8 @@ public: } } + virtual String getDefaultName() const CV_OVERRIDE { return "BackgroundSubtractor_KNN"; } + virtual int getHistory() const CV_OVERRIDE { return history; } virtual void setHistory(int _nframes) CV_OVERRIDE { history = _nframes; } diff --git a/modules/video/src/bgfg_gaussmix2.cpp b/modules/video/src/bgfg_gaussmix2.cpp index 69e4baf657..f7b26ef06b 100644 --- a/modules/video/src/bgfg_gaussmix2.cpp +++ b/modules/video/src/bgfg_gaussmix2.cpp @@ -236,6 +236,8 @@ public: } } + virtual String getDefaultName() const CV_OVERRIDE { return "BackgroundSubtractor_MOG2"; } + virtual int getHistory() const CV_OVERRIDE { return history; } virtual void setHistory(int _nframes) CV_OVERRIDE { history = _nframes; } diff --git a/modules/videoio/include/opencv2/videoio.hpp b/modules/videoio/include/opencv2/videoio.hpp index 93ea8cdddc..35491a0b5e 100644 --- a/modules/videoio/include/opencv2/videoio.hpp +++ b/modules/videoio/include/opencv2/videoio.hpp @@ -751,7 +751,7 @@ public: */ CV_WRAP virtual bool open(const String& filename, int apiPreference = CAP_ANY); - /** @brief Opens a camera for video capturing + /** @brief Opens a video file or a capturing device or an IP video stream for video capturing with API Preference and parameters @overload @@ -775,7 +775,7 @@ public: */ CV_WRAP virtual bool open(int index, int apiPreference = CAP_ANY); - /** @brief Returns true if video capturing has been initialized already. + /** @brief Opens a camera for video capturing with API Preference and parameters @overload diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp index 47dc00ab04..b7fa0b745c 100644 --- a/modules/videoio/src/cap_ffmpeg_impl.hpp +++ b/modules/videoio/src/cap_ffmpeg_impl.hpp @@ -41,6 +41,8 @@ //M*/ #include "cap_ffmpeg_legacy_api.hpp" +#include "opencv2/core/utils/logger.hpp" +#include "cap_interface.hpp" using namespace cv; @@ -49,6 +51,7 @@ using namespace cv; #endif #include #include +#include #ifndef __OPENCV_BUILD #define CV_FOURCC(c1, c2, c3, c4) (((c1) & 255) + (((c2) & 255) << 8) + (((c3) & 255) << 16) + (((c4) & 255) << 24)) @@ -79,6 +82,7 @@ extern "C" { #include #include +#include #if LIBAVUTIL_BUILD >= (LIBAVUTIL_VERSION_MICRO >= 100 \ ? CALC_FFMPEG_VERSION(51, 63, 100) : CALC_FFMPEG_VERSION(54, 6, 0)) @@ -88,6 +92,62 @@ extern "C" { #include #include +// https://github.com/FFmpeg/FFmpeg/blob/b6af56c034759b81985f8ea094e41cbd5f7fecfb/doc/APIchanges#L602-L605 +#if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(58, 9, 100) +# define CV_FFMPEG_REGISTER +#endif + +// https://github.com/FFmpeg/FFmpeg/blob/b6af56c034759b81985f8ea094e41cbd5f7fecfb/doc/APIchanges#L654-L657 +#if LIBAVCODEC_BUILD < CALC_FFMPEG_VERSION(58, 9, 100) +# define CV_FFMPEG_LOCKMGR +#endif + +// https://github.com/FFmpeg/FFmpeg/blob/b6af56c034759b81985f8ea094e41cbd5f7fecfb/doc/APIchanges#L390-L392 +#if LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(58, 87, 100) +#include +#endif + +// https://github.com/FFmpeg/FFmpeg/blob/b6af56c034759b81985f8ea094e41cbd5f7fecfb/doc/APIchanges#L208-L210 +#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(59, 0, 100) +# define CV_FFMPEG_FMT_CONST const +#else +# define CV_FFMPEG_FMT_CONST +#endif + +// https://github.com/FFmpeg/FFmpeg/blob/b6af56c034759b81985f8ea094e41cbd5f7fecfb/doc/APIchanges#L623-L624 +#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 7, 100) +# define CV_FFMPEG_URL +#endif + +// AVStream.codec deprecated in favor of AVStream.codecpar +// https://github.com/FFmpeg/FFmpeg/blob/b6af56c034759b81985f8ea094e41cbd5f7fecfb/doc/APIchanges#L1039-L1040 +#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(59, 16, 100) +//#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(57, 33, 100) +# define CV_FFMPEG_CODECPAR +# define CV_FFMPEG_CODEC_FIELD codecpar +#else +# define CV_FFMPEG_CODEC_FIELD codec +#endif + +#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(59, 16, 100) +# define CV_FFMPEG_PTS_FIELD pts +#else +# define CV_FFMPEG_PTS_FIELD pkt_pts +#endif + +// https://github.com/FFmpeg/FFmpeg/blob/b6af56c034759b81985f8ea094e41cbd5f7fecfb/doc/APIchanges#L1757-L1758 +#if LIBAVUTIL_BUILD < CALC_FFMPEG_VERSION(52, 63, 100) +inline static AVRational av_make_q(int num, int den) +{ + AVRational res; + res.num = num; + res.den = den; + return res; +} +#endif + + + #ifdef __cplusplus } #endif @@ -471,6 +531,15 @@ static AVRational _opencv_ffmpeg_get_sample_aspect_ratio(AVStream *stream) #endif } +inline static std::string _opencv_ffmpeg_get_error_string(int error_code) +{ + char buf[255] = {0}; + const int err = av_strerror(error_code, buf, 254); + if (err == 0) + return std::string(buf); + else + return std::string("Unknown error"); +} struct CvCapture_FFMPEG { @@ -502,6 +571,7 @@ struct CvCapture_FFMPEG AVFormatContext * ic; AVCodec * avcodec; + AVCodecContext * context; int video_stream; AVStream * video_st; AVFrame * picture; @@ -565,6 +635,7 @@ void CvCapture_FFMPEG::init() img_convert_ctx = 0; avcodec = 0; + context = 0; frame_number = 0; eps_zero = 0.000025; @@ -617,10 +688,19 @@ void CvCapture_FFMPEG::close() if( video_st ) { - avcodec_close( video_st->codec ); +#ifdef CV_FFMPEG_CODECPAR + avcodec_close( context ); +#endif video_st = NULL; } + if (context) + { +#ifdef CV_FFMPEG_CODECPAR + avcodec_free_context(&context); +#endif + } + if( ic ) { avformat_close_input(&ic); @@ -798,8 +878,10 @@ private: }; #endif + static ImplMutex _mutex; +#ifdef CV_FFMPEG_LOCKMGR static int LockCallBack(void **mutex, AVLockOp op) { ImplMutex* localMutex = reinterpret_cast(*mutex); @@ -830,7 +912,7 @@ static int LockCallBack(void **mutex, AVLockOp op) } return 0; } - +#endif static void ffmpeg_log_callback(void *ptr, int level, const char *fmt, va_list vargs) { @@ -881,19 +963,59 @@ public: { avformat_network_init(); +#ifdef CV_FFMPEG_REGISTER /* register all codecs, demux and protocols */ av_register_all(); +#endif +#ifdef CV_FFMPEG_LOCKMGR /* register a callback function for synchronization */ av_lockmgr_register(&LockCallBack); +#endif } ~InternalFFMpegRegister() { +#ifdef CV_FFMPEG_LOCKMGR av_lockmgr_register(NULL); +#endif av_log_set_callback(NULL); } }; +inline void fill_codec_context(AVCodecContext * enc, AVDictionary * dict) +{ +//#ifdef FF_API_THREAD_INIT +// avcodec_thread_init(enc, get_number_of_cpus()); +//#else + enc->thread_count = get_number_of_cpus(); +//#endif + + AVDictionaryEntry* avdiscard_entry = av_dict_get(dict, "avdiscard", NULL, 0); + + if (avdiscard_entry) + { + if(strcmp(avdiscard_entry->value, "all") == 0) + enc->skip_frame = AVDISCARD_ALL; + else if (strcmp(avdiscard_entry->value, "bidir") == 0) + enc->skip_frame = AVDISCARD_BIDIR; + else if (strcmp(avdiscard_entry->value, "default") == 0) + enc->skip_frame = AVDISCARD_DEFAULT; + else if (strcmp(avdiscard_entry->value, "none") == 0) + enc->skip_frame = AVDISCARD_NONE; + // NONINTRA flag was introduced with version bump at revision: + // https://github.com/FFmpeg/FFmpeg/commit/b152152df3b778d0a86dcda5d4f5d065b4175a7b + // This key is supported only for FFMPEG version +#if LIBAVCODEC_VERSION_MICRO >= 100 && LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(55, 67, 100) + else if (strcmp(avdiscard_entry->value, "nonintra") == 0) + enc->skip_frame = AVDISCARD_NONINTRA; +#endif + else if (strcmp(avdiscard_entry->value, "nonkey") == 0) + enc->skip_frame = AVDISCARD_NONKEY; + else if (strcmp(avdiscard_entry->value, "nonref") == 0) + enc->skip_frame = AVDISCARD_NONREF; + } +} + bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& params) { InternalFFMpegRegister::init(); @@ -997,7 +1119,7 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& #else av_dict_set(&dict, "rtsp_transport", "tcp", 0); #endif - AVInputFormat* input_format = NULL; + CV_FFMPEG_FMT_CONST AVInputFormat* input_format = NULL; AVDictionaryEntry* entry = av_dict_get(dict, "input_format", NULL, 0); if (entry != 0) { @@ -1015,60 +1137,44 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& err = avformat_find_stream_info(ic, NULL); if (err < 0) { - CV_WARN("Could not find codec parameters"); + CV_LOG_WARNING(NULL, "Unable to read codec parameters from stream (" << _opencv_ffmpeg_get_error_string(err) << ")"); goto exit_func; } for(i = 0; i < ic->nb_streams; i++) { - AVCodecContext* enc = ic->streams[i]->codec; - -//#ifdef FF_API_THREAD_INIT -// avcodec_thread_init(enc, get_number_of_cpus()); -//#else - enc->thread_count = get_number_of_cpus(); -//#endif - - AVDictionaryEntry* avdiscard_entry = av_dict_get(dict, "avdiscard", NULL, 0); - - if (avdiscard_entry) { - if(strcmp(avdiscard_entry->value, "all") == 0) - enc->skip_frame = AVDISCARD_ALL; - else if (strcmp(avdiscard_entry->value, "bidir") == 0) - enc->skip_frame = AVDISCARD_BIDIR; - else if (strcmp(avdiscard_entry->value, "default") == 0) - enc->skip_frame = AVDISCARD_DEFAULT; - else if (strcmp(avdiscard_entry->value, "none") == 0) - enc->skip_frame = AVDISCARD_NONE; - // NONINTRA flag was introduced with version bump at revision: - // https://github.com/FFmpeg/FFmpeg/commit/b152152df3b778d0a86dcda5d4f5d065b4175a7b - // This key is supported only for FFMPEG version -#if LIBAVCODEC_VERSION_MICRO >= 100 && LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(55, 67, 100) - else if (strcmp(avdiscard_entry->value, "nonintra") == 0) - enc->skip_frame = AVDISCARD_NONINTRA; +#ifndef CV_FFMPEG_CODECPAR + context = ic->streams[i]->codec; + AVCodecID codec_id = context->codec_id; + AVMediaType codec_type = context->codec_type; +#else + AVCodecParameters* par = ic->streams[i]->codecpar; + AVCodecID codec_id = par->codec_id; + AVMediaType codec_type = par->codec_type; #endif - else if (strcmp(avdiscard_entry->value, "nonkey") == 0) - enc->skip_frame = AVDISCARD_NONKEY; - else if (strcmp(avdiscard_entry->value, "nonref") == 0) - enc->skip_frame = AVDISCARD_NONREF; - } - if( AVMEDIA_TYPE_VIDEO == enc->codec_type && video_stream < 0) + if( AVMEDIA_TYPE_VIDEO == codec_type && video_stream < 0) { - CV_LOG_DEBUG(NULL, "FFMPEG: stream[" << i << "] is video stream with codecID=" << (int)enc->codec_id - << " width=" << enc->width - << " height=" << enc->height + // backup encoder' width/height +#ifndef CV_FFMPEG_CODECPAR + int enc_width = context->width; + int enc_height = context->height; +#else + int enc_width = par->width; + int enc_height = par->height; +#endif + + CV_LOG_DEBUG(NULL, "FFMPEG: stream[" << i << "] is video stream with codecID=" << (int)codec_id + << " width=" << enc_width + << " height=" << enc_height ); - // backup encoder' width/height - int enc_width = enc->width; - int enc_height = enc->height; #if !USE_AV_HW_CODECS va_type = VIDEO_ACCELERATION_NONE; #endif // find and open decoder, try HW acceleration types specified in 'hw_acceleration' list (in order) - AVCodec *codec = NULL; + const AVCodec *codec = NULL; err = -1; #if USE_AV_HW_CODECS HWAccelIterator accel_iter(va_type, false/*isEncoder*/, dict); @@ -1080,21 +1186,27 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& #if USE_AV_HW_CODECS accel_iter.parse_next(); AVHWDeviceType hw_type = accel_iter.hw_type(); - enc->get_format = avcodec_default_get_format; - if (enc->hw_device_ctx) { - av_buffer_unref(&enc->hw_device_ctx); - } if (hw_type != AV_HWDEVICE_TYPE_NONE) { CV_LOG_DEBUG(NULL, "FFMPEG: trying to configure H/W acceleration: '" << accel_iter.hw_type_device_string() << "'"); AVPixelFormat hw_pix_fmt = AV_PIX_FMT_NONE; - codec = hw_find_codec(enc->codec_id, hw_type, av_codec_is_decoder, accel_iter.disabled_codecs().c_str(), &hw_pix_fmt); - if (codec) { + codec = hw_find_codec(codec_id, hw_type, av_codec_is_decoder, accel_iter.disabled_codecs().c_str(), &hw_pix_fmt); + if (codec) + { +#ifdef CV_FFMPEG_CODECPAR + context = avcodec_alloc_context3(codec); +#endif + CV_Assert(context); + context->get_format = avcodec_default_get_format; + if (context->hw_device_ctx) { + av_buffer_unref(&context->hw_device_ctx); + } if (hw_pix_fmt != AV_PIX_FMT_NONE) - enc->get_format = hw_get_format_callback; // set callback to select HW pixel format, not SW format - enc->hw_device_ctx = hw_create_device(hw_type, hw_device, accel_iter.device_subname(), use_opencl != 0); - if (!enc->hw_device_ctx) + context->get_format = hw_get_format_callback; // set callback to select HW pixel format, not SW format + context->hw_device_ctx = hw_create_device(hw_type, hw_device, accel_iter.device_subname(), use_opencl != 0); + if (!context->hw_device_ctx) { + context->get_format = avcodec_default_get_format; CV_LOG_DEBUG(NULL, "FFMPEG: ... can't create H/W device: '" << accel_iter.hw_type_device_string() << "'"); codec = NULL; } @@ -1106,10 +1218,10 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& AVDictionaryEntry* video_codec_param = av_dict_get(dict, "video_codec", NULL, 0); if (video_codec_param == NULL) { - codec = avcodec_find_decoder(enc->codec_id); + codec = avcodec_find_decoder(codec_id); if (!codec) { - CV_LOG_ERROR(NULL, "Could not find decoder for codec_id=" << (int)enc->codec_id); + CV_LOG_ERROR(NULL, "Could not find decoder for codec_id=" << (int)codec_id); } } else @@ -1121,10 +1233,26 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& CV_LOG_ERROR(NULL, "Could not find decoder '" << video_codec_param->value << "'"); } } + if (codec) + { +#ifdef CV_FFMPEG_CODECPAR + context = avcodec_alloc_context3(codec); +#endif + CV_Assert(context); + } } if (!codec) + { +#ifdef CV_FFMPEG_CODECPAR + avcodec_free_context(&context); +#endif continue; - err = avcodec_open2(enc, codec, NULL); + } + fill_codec_context(context, dict); +#ifdef CV_FFMPEG_CODECPAR + avcodec_parameters_to_context(context, par); +#endif + err = avcodec_open2(context, codec, NULL); if (err >= 0) { #if USE_AV_HW_CODECS va_type = hw_type_to_va_type(hw_type); @@ -1146,10 +1274,10 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& } // checking width/height (since decoder can sometimes alter it, eg. vp6f) - if (enc_width && (enc->width != enc_width)) - enc->width = enc_width; - if (enc_height && (enc->height != enc_height)) - enc->height = enc_height; + if (enc_width && (context->width != enc_width)) + context->width = enc_width; + if (enc_height && (context->height != enc_height)) + context->height = enc_height; video_stream = i; video_st = ic->streams[i]; @@ -1160,8 +1288,8 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& picture = avcodec_alloc_frame(); #endif - frame.width = enc->width; - frame.height = enc->height; + frame.width = context->width; + frame.height = context->height; frame.cn = 3; frame.step = 0; frame.data = NULL; @@ -1306,7 +1434,7 @@ bool CvCapture_FFMPEG::grabFrame() int count_errs = 0; const int max_number_of_attempts = 1 << 9; - if( !ic || !video_st ) return false; + if( !ic || !video_st || !context ) return false; if( ic->streams[video_stream]->nb_frames > 0 && frame_number > ic->streams[video_stream]->nb_frames ) @@ -1322,7 +1450,7 @@ bool CvCapture_FFMPEG::grabFrame() #if USE_AV_SEND_FRAME_API // check if we can receive frame from previously decoded packet - valid = avcodec_receive_frame(video_st->codec, picture) >= 0; + valid = avcodec_receive_frame(context, picture) >= 0; #endif // get the next frame @@ -1372,19 +1500,19 @@ bool CvCapture_FFMPEG::grabFrame() // Decode video frame #if USE_AV_SEND_FRAME_API - if (avcodec_send_packet(video_st->codec, &packet) < 0) { + if (avcodec_send_packet(context, &packet) < 0) { break; } - ret = avcodec_receive_frame(video_st->codec, picture); + ret = avcodec_receive_frame(context, picture); #else int got_picture = 0; - avcodec_decode_video2(video_st->codec, picture, &got_picture, &packet); + avcodec_decode_video2(context, picture, &got_picture, &packet); ret = got_picture ? 0 : -1; #endif if (ret >= 0) { //picture_pts = picture->best_effort_timestamp; if( picture_pts == AV_NOPTS_VALUE_ ) - picture_pts = picture->pkt_pts != AV_NOPTS_VALUE_ && picture->pkt_pts != 0 ? picture->pkt_pts : picture->pkt_dts; + picture_pts = picture->CV_FFMPEG_PTS_FIELD != AV_NOPTS_VALUE_ && picture->CV_FFMPEG_PTS_FIELD != 0 ? picture->CV_FFMPEG_PTS_FIELD : picture->pkt_dts; valid = true; } else if (ret == AVERROR(EAGAIN)) { @@ -1415,7 +1543,7 @@ bool CvCapture_FFMPEG::grabFrame() bool CvCapture_FFMPEG::retrieveFrame(int flag, unsigned char** data, int* step, int* width, int* height, int* cn) { - if (!video_st) + if (!video_st || !context) return false; if (rawMode || flag == extraDataIdx) @@ -1428,8 +1556,8 @@ bool CvCapture_FFMPEG::retrieveFrame(int flag, unsigned char** data, int* step, ret = p.data != NULL; } else if (flag == extraDataIdx) { - *data = ic->streams[video_stream]->codec->extradata; - *step = ic->streams[video_stream]->codec->extradata_size; + *data = ic->streams[video_stream]->CV_FFMPEG_CODEC_FIELD->extradata; + *step = ic->streams[video_stream]->CV_FFMPEG_CODEC_FIELD->extradata_size; } *width = *step; *height = 1; @@ -1454,13 +1582,13 @@ bool CvCapture_FFMPEG::retrieveFrame(int flag, unsigned char** data, int* step, return false; if( img_convert_ctx == NULL || - frame.width != video_st->codec->width || - frame.height != video_st->codec->height || + frame.width != video_st->CV_FFMPEG_CODEC_FIELD->width || + frame.height != video_st->CV_FFMPEG_CODEC_FIELD->height || frame.data == NULL ) { // Some sws_scale optimizations have some assumptions about alignment of data/step/width/height // Also we use coded_width/height to workaround problem with legacy ffmpeg versions (like n0.8) - int buffer_width = video_st->codec->coded_width, buffer_height = video_st->codec->coded_height; + int buffer_width = context->coded_width, buffer_height = context->coded_height; img_convert_ctx = sws_getCachedContext( img_convert_ctx, @@ -1494,8 +1622,8 @@ bool CvCapture_FFMPEG::retrieveFrame(int flag, unsigned char** data, int* step, _opencv_ffmpeg_av_image_fill_arrays(&rgb_picture, rgb_picture.data[0], AV_PIX_FMT_BGR24, buffer_width, buffer_height ); #endif - frame.width = video_st->codec->width; - frame.height = video_st->codec->height; + frame.width = video_st->CV_FFMPEG_CODEC_FIELD->width; + frame.height = video_st->CV_FFMPEG_CODEC_FIELD->height; frame.cn = 3; frame.data = rgb_picture.data[0]; frame.step = rgb_picture.linesize[0]; @@ -1505,7 +1633,7 @@ bool CvCapture_FFMPEG::retrieveFrame(int flag, unsigned char** data, int* step, img_convert_ctx, sw_picture->data, sw_picture->linesize, - 0, video_st->codec->coded_height, + 0, context->coded_height, rgb_picture.data, rgb_picture.linesize ); @@ -1519,7 +1647,7 @@ bool CvCapture_FFMPEG::retrieveFrame(int flag, unsigned char** data, int* step, #if USE_AV_HW_CODECS if (sw_picture != picture) { - av_frame_unref(sw_picture); + av_frame_free(&sw_picture); } #endif return true; @@ -1529,12 +1657,12 @@ bool CvCapture_FFMPEG::retrieveHWFrame(cv::OutputArray output) { #if USE_AV_HW_CODECS // check that we have HW frame in GPU memory - if (!picture || !picture->hw_frames_ctx) { + if (!picture || !picture->hw_frames_ctx || !context) { return false; } // GPU color conversion NV12->BGRA, from GPU media buffer to GPU OpenCL buffer - return hw_copy_frame_to_umat(video_st->codec->hw_device_ctx, picture, output); + return hw_copy_frame_to_umat(context->hw_device_ctx, picture, output); #else CV_UNUSED(output); return false; @@ -1543,7 +1671,7 @@ bool CvCapture_FFMPEG::retrieveHWFrame(cv::OutputArray output) double CvCapture_FFMPEG::getProperty( int property_id ) const { - if( !video_st ) return 0; + if( !video_st || !context ) return 0; double codec_tag = 0; CV_CODEC_ID codec_id = AV_CODEC_ID_NONE; @@ -1570,8 +1698,8 @@ double CvCapture_FFMPEG::getProperty( int property_id ) const case CAP_PROP_FPS: return get_fps(); case CAP_PROP_FOURCC: - codec_id = video_st->codec->codec_id; - codec_tag = (double) video_st->codec->codec_tag; + codec_id = video_st->CV_FFMPEG_CODEC_FIELD->codec_id; + codec_tag = (double) video_st->CV_FFMPEG_CODEC_FIELD->codec_tag; if(codec_tag || codec_id == AV_CODEC_ID_NONE) { @@ -1591,7 +1719,11 @@ double CvCapture_FFMPEG::getProperty( int property_id ) const return _opencv_ffmpeg_get_sample_aspect_ratio(ic->streams[video_stream]).den; case CAP_PROP_CODEC_PIXEL_FORMAT: { +#ifdef CV_FFMPEG_CODECPAR + AVPixelFormat pix_fmt = (AVPixelFormat)video_st->codecpar->format; +#else AVPixelFormat pix_fmt = video_st->codec->pix_fmt; +#endif unsigned int fourcc_tag = avcodec_pix_fmt_to_codec_tag(pix_fmt); return (fourcc_tag == 0) ? (double)-1 : (double)fourcc_tag; } @@ -1671,7 +1803,7 @@ double CvCapture_FFMPEG::get_fps() const if (fps < eps_zero) { - fps = 1.0 / r2d(ic->streams[video_stream]->codec->time_base); + fps = 1.0 / r2d(ic->streams[video_stream]->time_base); } #endif return fps; @@ -1703,7 +1835,16 @@ double CvCapture_FFMPEG::dts_to_sec(int64_t dts) const void CvCapture_FFMPEG::get_rotation_angle() { rotation_angle = 0; -#if LIBAVUTIL_BUILD >= CALC_FFMPEG_VERSION(52, 94, 100) +#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(57, 68, 100) + const uint8_t *data = 0; + data = av_stream_get_side_data(video_st, AV_PKT_DATA_DISPLAYMATRIX, NULL); + if (data) + { + rotation_angle = cvRound(av_display_rotation_get((const int32_t*)data)); + if (rotation_angle < 0) + rotation_angle += 360; + } +#elif LIBAVUTIL_BUILD >= CALC_FFMPEG_VERSION(52, 94, 100) AVDictionaryEntry *rotate_tag = av_dict_get(video_st->metadata, "rotate", NULL, 0); if (rotate_tag != NULL) rotation_angle = atoi(rotate_tag->value); @@ -1712,6 +1853,7 @@ void CvCapture_FFMPEG::get_rotation_angle() void CvCapture_FFMPEG::seek(int64_t _frame_number) { + CV_Assert(context); _frame_number = std::min(_frame_number, get_total_frames()); int delta = 16; @@ -1728,7 +1870,7 @@ void CvCapture_FFMPEG::seek(int64_t _frame_number) double time_base = r2d(ic->streams[video_stream]->time_base); time_stamp += (int64_t)(sec / time_base + 0.5); if (get_total_frames() > 1) av_seek_frame(ic, video_stream, time_stamp, AVSEEK_FLAG_BACKWARD); - avcodec_flush_buffers(ic->streams[video_stream]->codec); + avcodec_flush_buffers(context); if( _frame_number > 0 ) { grabFrame(); @@ -1833,7 +1975,7 @@ struct CvVideoWriter_FFMPEG void init(); - AVOutputFormat * fmt; + CV_FFMPEG_FMT_CONST AVOutputFormat * fmt; AVFormatContext * oc; uint8_t * outbuf; uint32_t outbuf_size; @@ -1842,6 +1984,7 @@ struct CvVideoWriter_FFMPEG AVFrame * input_picture; uint8_t * picbuf; AVStream * video_st; + AVCodecContext * context; AVPixelFormat input_pix_fmt; unsigned char * aligned_input; size_t aligned_input_size; @@ -1906,6 +2049,7 @@ void CvVideoWriter_FFMPEG::init() input_picture = 0; picbuf = 0; video_st = 0; + context = 0; input_pix_fmt = AV_PIX_FMT_NONE; aligned_input = NULL; aligned_input_size = 0; @@ -1957,23 +2101,32 @@ static AVFrame * icv_alloc_picture_FFMPEG(int pix_fmt, int width, int height, bo } /* configure video stream */ -static bool icv_configure_video_stream_FFMPEG(AVFormatContext *oc, +static AVCodecContext * icv_configure_video_stream_FFMPEG(AVFormatContext *oc, AVStream *st, const AVCodec* codec, int w, int h, int bitrate, - double fps, AVPixelFormat pixel_format) + double fps, AVPixelFormat pixel_format, int fourcc) { +#ifdef CV_FFMPEG_CODECPAR + AVCodecContext *c = avcodec_alloc_context3(codec); +#else AVCodecContext *c = st->codec; +#endif + CV_Assert(c); + int frame_rate, frame_rate_base; c->codec_id = codec->id; c->codec_type = AVMEDIA_TYPE_VIDEO; + c->codec_tag = fourcc; +#ifndef CV_FFMPEG_CODECPAR // Set per-codec defaults CV_CODEC_ID c_id = c->codec_id; avcodec_get_context_defaults3(c, codec); // avcodec_get_context_defaults3 erases codec_id for some reason c->codec_id = c_id; +#endif /* put sample parameters */ int64_t lbit_rate = (int64_t)bitrate; @@ -2016,7 +2169,12 @@ static bool icv_configure_video_stream_FFMPEG(AVFormatContext *oc, } } if (best == NULL) - return false; + { +#ifdef CV_FFMPEG_CODECPAR + avcodec_free_context(&c); +#endif + return NULL; + } c->time_base.den= best->num; c->time_base.num= best->den; } @@ -2059,26 +2217,20 @@ static bool icv_configure_video_stream_FFMPEG(AVFormatContext *oc, #endif } -#if defined(_MSC_VER) - AVRational avg_frame_rate = {frame_rate, frame_rate_base}; - st->avg_frame_rate = avg_frame_rate; -#else - st->avg_frame_rate = (AVRational){frame_rate, frame_rate_base}; -#endif + st->avg_frame_rate = av_make_q(frame_rate, frame_rate_base); #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(55, 20, 0) st->time_base = c->time_base; #endif - return true; + return c; } static const int OPENCV_NO_FRAMES_WRITTEN_CODE = 1000; -static int icv_av_write_frame_FFMPEG( AVFormatContext * oc, AVStream * video_st, +static int icv_av_write_frame_FFMPEG( AVFormatContext * oc, AVStream * video_st, AVCodecContext * c, uint8_t *, uint32_t, AVFrame * picture, int frame_idx) { - AVCodecContext* c = video_st->codec; int ret = OPENCV_NO_FRAMES_WRITTEN_CODE; #if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(57, 0, 0) @@ -2176,9 +2328,6 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int width = frame_width; height = frame_height; - // typecast from opaque data type to implemented struct - AVCodecContext* c = video_st->codec; - // FFmpeg contains SIMD optimizations which can sometimes read data past // the supplied input buffer. // Related info: https://trac.ffmpeg.org/ticket/6763 @@ -2215,10 +2364,10 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int step = aligned_step; } - AVPixelFormat sw_pix_fmt = c->pix_fmt; + AVPixelFormat sw_pix_fmt = context->pix_fmt; #if USE_AV_HW_CODECS - if (c->hw_frames_ctx) - sw_pix_fmt = ((AVHWFramesContext*)c->hw_frames_ctx->data)->sw_format; + if (context->hw_frames_ctx) + sw_pix_fmt = ((AVHWFramesContext*)context->hw_frames_ctx->data)->sw_format; #endif if ( sw_pix_fmt != input_pix_fmt ) { CV_Assert( input_picture ); @@ -2232,8 +2381,8 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int img_convert_ctx = sws_getContext(width, height, (AVPixelFormat)input_pix_fmt, - c->width, - c->height, + context->width, + context->height, sw_pix_fmt, SWS_BICUBIC, NULL, NULL, NULL); @@ -2255,14 +2404,14 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int bool ret; #if USE_AV_HW_CODECS - if (video_st->codec->hw_device_ctx) { + if (context->hw_device_ctx) { // copy data to HW frame AVFrame* hw_frame = av_frame_alloc(); if (!hw_frame) { CV_LOG_ERROR(NULL, "Error allocating AVFrame (av_frame_alloc)"); return false; } - if (av_hwframe_get_buffer(video_st->codec->hw_frames_ctx, hw_frame, 0) < 0) { + if (av_hwframe_get_buffer(context->hw_frames_ctx, hw_frame, 0) < 0) { CV_LOG_ERROR(NULL, "Error obtaining HW frame (av_hwframe_get_buffer)"); av_frame_free(&hw_frame); return false; @@ -2273,14 +2422,14 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int return false; } hw_frame->pts = frame_idx; - int ret_write = icv_av_write_frame_FFMPEG(oc, video_st, outbuf, outbuf_size, hw_frame, frame_idx); + int ret_write = icv_av_write_frame_FFMPEG(oc, video_st, context, outbuf, outbuf_size, hw_frame, frame_idx); ret = ret_write >= 0 ? true : false; av_frame_free(&hw_frame); } else #endif { picture->pts = frame_idx; - int ret_write = icv_av_write_frame_FFMPEG(oc, video_st, outbuf, outbuf_size, picture, frame_idx); + int ret_write = icv_av_write_frame_FFMPEG(oc, video_st, context, outbuf, outbuf_size, picture, frame_idx); ret = ret_write >= 0 ? true : false; } @@ -2291,7 +2440,7 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int bool CvVideoWriter_FFMPEG::writeHWFrame(cv::InputArray input) { #if USE_AV_HW_CODECS - if (!video_st->codec->hw_frames_ctx) + if (!video_st || !context || !context->hw_frames_ctx || !context->hw_device_ctx) return false; // Get hardware frame from frame pool @@ -2299,20 +2448,20 @@ bool CvVideoWriter_FFMPEG::writeHWFrame(cv::InputArray input) { if (!hw_frame) { return false; } - if (av_hwframe_get_buffer(video_st->codec->hw_frames_ctx, hw_frame, 0) < 0) { + if (av_hwframe_get_buffer(context->hw_frames_ctx, hw_frame, 0) < 0) { av_frame_free(&hw_frame); return false; } // GPU to GPU copy - if (!hw_copy_umat_to_frame(video_st->codec->hw_device_ctx, input, hw_frame)) { + if (!hw_copy_umat_to_frame(context->hw_device_ctx, input, hw_frame)) { av_frame_free(&hw_frame); return false; } // encode hw_frame->pts = frame_idx; - icv_av_write_frame_FFMPEG( oc, video_st, outbuf, outbuf_size, hw_frame, frame_idx); + icv_av_write_frame_FFMPEG( oc, video_st, context, outbuf, outbuf_size, hw_frame, frame_idx); frame_idx++; av_frame_free(&hw_frame); @@ -2347,17 +2496,13 @@ double CvVideoWriter_FFMPEG::getProperty(int propId) const /// close video output stream and free associated memory void CvVideoWriter_FFMPEG::close() { - // nothing to do if already released - if ( !picture ) - return; - /* no more frame to compress. The codec has a latency of a few frames if using B frames, so we get the last frames by passing the same picture again */ // TODO -- do we need to account for latency here? /* write the trailer, if any */ - if(ok && oc) + if (picture && ok && oc) { #if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(57, 0, 0) if (!(oc->oformat->flags & AVFMT_RAWPICTURE)) @@ -2365,7 +2510,7 @@ void CvVideoWriter_FFMPEG::close() { for(;;) { - int ret = icv_av_write_frame_FFMPEG( oc, video_st, outbuf, outbuf_size, NULL, frame_idx); + int ret = icv_av_write_frame_FFMPEG( oc, video_st, context, outbuf, outbuf_size, NULL, frame_idx); if( ret == OPENCV_NO_FRAMES_WRITTEN_CODE || ret < 0 ) break; } @@ -2380,7 +2525,7 @@ void CvVideoWriter_FFMPEG::close() } // free pictures - if( video_st->codec->pix_fmt != input_pix_fmt) + if (picture && context && context->pix_fmt != input_pix_fmt) { if(picture->data[0]) free(picture->data[0]); @@ -2391,8 +2536,14 @@ void CvVideoWriter_FFMPEG::close() if (input_picture) av_free(input_picture); +#ifdef CV_FFMPEG_CODECPAR + avcodec_free_context(&context); +#else /* close codec */ - avcodec_close(video_st->codec); + if (context) // fixed after https://github.com/FFmpeg/FFmpeg/commit/3e1f507f3e8f16b716aa115552d243b48ae809bd + avcodec_close(context); + context = NULL; +#endif av_free(outbuf); @@ -2599,8 +2750,15 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, /* set file name */ oc->oformat = fmt; +#ifndef CV_FFMPEG_URL snprintf(oc->filename, sizeof(oc->filename), "%s", filename); - +#else + size_t name_len = strlen(filename); + oc->url = (char*)av_malloc(name_len + 1); + CV_Assert(oc->url); + memcpy((void*)oc->url, filename, name_len + 1); + oc->url[name_len] = '\0'; +#endif /* set some options */ oc->max_delay = (int)(0.7*AV_TIME_BASE); /* This reduces buffer underrun warnings with MPEG */ @@ -2715,7 +2873,7 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, double bitrate = std::min(bitrate_scale*fps*width*height, (double)INT_MAX/2); if (codec_id == AV_CODEC_ID_NONE) { - codec_id = av_guess_codec(oc->oformat, NULL, oc->filename, NULL, AVMEDIA_TYPE_VIDEO); + codec_id = av_guess_codec(oc->oformat, NULL, filename, NULL, AVMEDIA_TYPE_VIDEO); } // Add video stream to output file @@ -2733,11 +2891,9 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, } #endif - AVCodecContext *c = video_st->codec; - // find and open encoder, try HW acceleration types specified in 'hw_acceleration' list (in order) int err = -1; - AVCodec* codec = NULL; + const AVCodec* codec = NULL; #if USE_AV_HW_CODECS AVBufferRef* hw_device_ctx = NULL; HWAccelIterator accel_iter(va_type, true/*isEncoder*/, dict); @@ -2780,9 +2936,14 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, AVPixelFormat format = codec_pix_fmt; #endif - if (!icv_configure_video_stream_FFMPEG(oc, video_st, codec, - width, height, (int) (bitrate + 0.5), - fps, format)) { +#ifdef CV_FFMPEG_CODECPAR + avcodec_free_context(&context); +#endif + context = icv_configure_video_stream_FFMPEG(oc, video_st, codec, + width, height, (int) (bitrate + 0.5), + fps, format, fourcc); + if (!context) + { continue; } @@ -2794,27 +2955,25 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, #endif #endif - c->codec_tag = fourcc; - #if USE_AV_HW_CODECS if (hw_device_ctx) { - c->hw_device_ctx = av_buffer_ref(hw_device_ctx); + context->hw_device_ctx = av_buffer_ref(hw_device_ctx); if (hw_format != AV_PIX_FMT_NONE) { - c->hw_frames_ctx = hw_create_frames(NULL, hw_device_ctx, width, height, hw_format); - if (!c->hw_frames_ctx) + context->hw_frames_ctx = hw_create_frames(NULL, hw_device_ctx, width, height, hw_format); + if (!context->hw_frames_ctx) continue; } } #endif - int64_t lbit_rate = (int64_t) c->bit_rate; + int64_t lbit_rate = (int64_t) context->bit_rate; lbit_rate += (int64_t)(bitrate / 2); lbit_rate = std::min(lbit_rate, (int64_t) INT_MAX); - c->bit_rate_tolerance = (int) lbit_rate; - c->bit_rate = (int) lbit_rate; + context->bit_rate_tolerance = (int) lbit_rate; + context->bit_rate = (int) lbit_rate; /* open the codec */ - err = avcodec_open2(c, codec, NULL); + err = avcodec_open2(context, codec, NULL); if (err >= 0) { #if USE_AV_HW_CODECS va_type = hw_type_to_va_type(hw_type); @@ -2823,7 +2982,7 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, #endif break; } else { - CV_LOG_ERROR(NULL, "Could not open codec " << codec->name << ", error: " << icvFFMPEGErrStr(err)); + CV_LOG_ERROR(NULL, "Could not open codec " << codec->name << ", error: " << icvFFMPEGErrStr(err) << " (" << err << ")"); } #if USE_AV_HW_CODECS } // while (accel_iter.good()) @@ -2844,6 +3003,12 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, return false; } +#ifdef CV_FFMPEG_CODECPAR + // Copy all to codecpar... + // !!! https://stackoverflow.com/questions/15897849/c-ffmpeg-not-writing-avcc-box-information + avcodec_parameters_from_context(video_st->codecpar, context); +#endif + outbuf = NULL; @@ -2858,16 +3023,16 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, } bool need_color_convert; - AVPixelFormat sw_pix_fmt = c->pix_fmt; + AVPixelFormat sw_pix_fmt = context->pix_fmt; #if USE_AV_HW_CODECS - if (c->hw_frames_ctx) - sw_pix_fmt = ((AVHWFramesContext*)c->hw_frames_ctx->data)->sw_format; + if (context->hw_frames_ctx) + sw_pix_fmt = ((AVHWFramesContext*)context->hw_frames_ctx->data)->sw_format; #endif need_color_convert = (sw_pix_fmt != input_pix_fmt); /* allocate the encoded raw picture */ - picture = icv_alloc_picture_FFMPEG(sw_pix_fmt, c->width, c->height, need_color_convert); + picture = icv_alloc_picture_FFMPEG(sw_pix_fmt, context->width, context->height, need_color_convert); if (!picture) { return false; } @@ -2877,7 +3042,7 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, to the required output format */ input_picture = NULL; if ( need_color_convert ) { - input_picture = icv_alloc_picture_FFMPEG(input_pix_fmt, c->width, c->height, false); + input_picture = icv_alloc_picture_FFMPEG(input_pix_fmt, context->width, context->height, false); if (!input_picture) { return false; } diff --git a/modules/videoio/test/test_ffmpeg.cpp b/modules/videoio/test/test_ffmpeg.cpp index 87e25bbd3d..40ef586fd6 100644 --- a/modules/videoio/test/test_ffmpeg.cpp +++ b/modules/videoio/test/test_ffmpeg.cpp @@ -235,8 +235,8 @@ static void generateFrame(Mat &frame, unsigned int i, const Point ¢er, const frame = Scalar::all(i % 255); stringstream buf(ios::out); buf << "frame #" << i; - putText(frame, buf.str(), Point(50, center.y), FONT_HERSHEY_SIMPLEX, 5.0, color, 5, CV_AA); - circle(frame, center, i + 2, color, 2, CV_AA); + putText(frame, buf.str(), Point(50, center.y), FONT_HERSHEY_SIMPLEX, 5.0, color, 5, LINE_AA); + circle(frame, center, i + 2, color, 2, LINE_AA); } TEST(videoio_ffmpeg, parallel) @@ -536,5 +536,17 @@ TEST(videoio_ffmpeg, create_with_property_badarg) EXPECT_FALSE(cap.isOpened()); } +// related issue: https://github.com/opencv/opencv/issues/16821 +TEST(videoio_ffmpeg, DISABLED_open_from_web) +{ + if (!videoio_registry::hasBackend(CAP_FFMPEG)) + throw SkipTestException("FFmpeg backend was not found"); + + string video_file = "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4"; + VideoCapture cap(video_file, CAP_FFMPEG); + int n_frames = -1; + EXPECT_NO_THROW(n_frames = (int)cap.get(CAP_PROP_FRAME_COUNT)); + EXPECT_EQ((int)14315, n_frames); +} }} // namespace diff --git a/modules/videoio/test/test_precomp.hpp b/modules/videoio/test/test_precomp.hpp index 5bc2ccdf95..cffdf2bef4 100644 --- a/modules/videoio/test/test_precomp.hpp +++ b/modules/videoio/test/test_precomp.hpp @@ -9,8 +9,6 @@ #include "opencv2/ts.hpp" #include "opencv2/videoio.hpp" #include "opencv2/videoio/registry.hpp" -#include "opencv2/imgproc/imgproc_c.h" - #include "opencv2/core/private.hpp" namespace cv { diff --git a/modules/videoio/test/test_video_io.cpp b/modules/videoio/test/test_video_io.cpp index 9545ae4ec2..6486f2d98c 100644 --- a/modules/videoio/test/test_video_io.cpp +++ b/modules/videoio/test/test_video_io.cpp @@ -1,47 +1,8 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. #include "test_precomp.hpp" -#include "opencv2/videoio/videoio_c.h" namespace opencv_test { diff --git a/platforms/linux/riscv64-clang.toolchain.cmake b/platforms/linux/riscv64-clang.toolchain.cmake index f19c244f7b..62d9e293d2 100644 --- a/platforms/linux/riscv64-clang.toolchain.cmake +++ b/platforms/linux/riscv64-clang.toolchain.cmake @@ -17,8 +17,8 @@ set(CMAKE_ASM_COMPILER_TARGET ${CLANG_TARGET_TRIPLE}) # Don't run the linker on compiler check set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) -set(CMAKE_C_FLAGS "-march=rv64gcv0p10 -menable-experimental-extensions --gcc-toolchain=${RISCV_GCC_INSTALL_ROOT} -w ${CMAKE_C_FLAGS}") -set(CMAKE_CXX_FLAGS "-march=rv64gcv0p10 -menable-experimental-extensions --gcc-toolchain=${RISCV_GCC_INSTALL_ROOT} -w ${CXX_FLAGS}") +set(CMAKE_C_FLAGS "-march=rv64gcv --gcc-toolchain=${RISCV_GCC_INSTALL_ROOT} -w ${CMAKE_C_FLAGS}") +set(CMAKE_CXX_FLAGS "-march=rv64gcv --gcc-toolchain=${RISCV_GCC_INSTALL_ROOT} -w ${CXX_FLAGS}") set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O2") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2") diff --git a/samples/dnn/speech_recognition.cpp b/samples/dnn/speech_recognition.cpp new file mode 100644 index 0000000000..ff461c50f5 --- /dev/null +++ b/samples/dnn/speech_recognition.cpp @@ -0,0 +1,587 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +using namespace cv; +using namespace std; + +class FilterbankFeatures { + +// Initializes pre-processing class. Default values are the values used by the Jasper +// architecture for pre-processing. For more details, refer to the paper here: +// https://arxiv.org/abs/1904.03288 + +private: + int sample_rate = 16000; + double window_size = 0.02; + double window_stride = 0.01; + int win_length = static_cast(sample_rate * window_size); // Number of samples in window + int hop_length = static_cast(sample_rate * window_stride); // Number of steps to advance between frames + int n_fft = 512; // Size of window for STFT + + // Parameters for filterbanks calculation + int n_filt = 64; + double lowfreq = 0.; + double highfreq = sample_rate / 2; + +public: + // Mel filterbanks preparation + double hz_to_mel(double frequencies) + { + //Converts frequencies from hz to mel scale + // Fill in the linear scale + double f_min = 0.0; + double f_sp = 200.0 / 3; + double mels = (frequencies - f_min) / f_sp; + // Fill in the log-scale part + double min_log_hz = 1000.0; // beginning of log region (Hz) + double min_log_mel = (min_log_hz - f_min) / f_sp; // same (Mels) + double logstep = std::log(6.4) / 27.0; // step size for log region + + if (frequencies >= min_log_hz) + { + mels = min_log_mel + std::log(frequencies / min_log_hz) / logstep; + } + return mels; + } + + vector mel_to_hz(vector& mels) + { + // Converts frequencies from mel to hz scale + + // Fill in the linear scale + double f_min = 0.0; + double f_sp = 200.0 / 3; + vector freqs; + for (size_t i = 0; i < mels.size(); i++) + { + freqs.push_back(f_min + f_sp * mels[i]); + } + + // And now the nonlinear scale + double min_log_hz = 1000.0; // beginning of log region (Hz) + double min_log_mel = (min_log_hz - f_min) / f_sp; // same (Mels) + double logstep = std::log(6.4) / 27.0; // step size for log region + + for(size_t i = 0; i < mels.size(); i++) + { + if (mels[i] >= min_log_mel) + { + freqs[i] = min_log_hz * exp(logstep * (mels[i] - min_log_mel)); + } + } + return freqs; + } + + vector mel_frequencies(int n_mels, double fmin, double fmax) + { + // Calculates n mel frequencies between 2 frequencies + double min_mel = hz_to_mel(fmin); + double max_mel = hz_to_mel(fmax); + + vector mels; + double step = (max_mel - min_mel) / (n_mels - 1); + for(double i = min_mel; i < max_mel; i += step) + { + mels.push_back(i); + } + mels.push_back(max_mel); + + vector res = mel_to_hz(mels); + return res; + } + + vector> mel(int n_mels, double fmin, double fmax) + { + // Generates mel filterbank matrix + + double num = 1 + n_fft / 2; + vector> weights(n_mels, vector(static_cast(num), 0.)); + + // Center freqs of each FFT bin + vector fftfreqs; + double step = (sample_rate / 2) / (num - 1); + for(double i = 0; i <= sample_rate / 2; i += step) + { + fftfreqs.push_back(i); + } + // 'Center freqs' of mel bands - uniformly spaced between limits + vector mel_f = mel_frequencies(n_mels + 2, fmin, fmax); + + vector fdiff; + for(size_t i = 1; i < mel_f.size(); ++i) + { + fdiff.push_back(mel_f[i]- mel_f[i - 1]); + } + + vector> ramps(mel_f.size(), vector(fftfreqs.size())); + for (size_t i = 0; i < mel_f.size(); ++i) + { + for (size_t j = 0; j < fftfreqs.size(); ++j) + { + ramps[i][j] = mel_f[i] - fftfreqs[j]; + } + } + + double lower, upper, enorm; + for (int i = 0; i < n_mels; ++i) + { + // using Slaney-style mel which is scaled to be approx constant energy per channel + enorm = 2./(mel_f[i + 2] - mel_f[i]); + + for (int j = 0; j < static_cast(num); ++j) + { + // lower and upper slopes for all bins + lower = (-1) * ramps[i][j] / fdiff[i]; + upper = ramps[i + 2][j] / fdiff[i + 1]; + + weights[i][j] = max(0., min(lower, upper)) * enorm; + } + } + return weights; + } + + // STFT preparation + vector pad_window_center(vector&data, int size) + { + // Pad the window out to n_fft size + int n = static_cast(data.size()); + int lpad = static_cast((size - n) / 2); + vector pad_array; + + for(int i = 0; i < lpad; ++i) + { + pad_array.push_back(0.); + } + + for(size_t i = 0; i < data.size(); ++i) + { + pad_array.push_back(data[i]); + } + + for(int i = 0; i < lpad; ++i) + { + pad_array.push_back(0.); + } + return pad_array; + } + + vector> frame(vector& x) + { + // Slices a data array into overlapping frames. + int n_frames = static_cast(1 + (x.size() - n_fft) / hop_length); + vector> new_x(n_fft, vector(n_frames)); + + for (int i = 0; i < n_fft; ++i) + { + for (int j = 0; j < n_frames; ++j) + { + new_x[i][j] = x[i + j * hop_length]; + } + } + return new_x; + } + + vector hanning() + { + // https://en.wikipedia.org/wiki/Window_function#Hann_and_Hamming_windows + vector window_tensor; + for (int j = 1 - win_length; j < win_length; j+=2) + { + window_tensor.push_back(1 - (0.5 * (1 - cos(CV_PI * j / (win_length - 1))))); + } + return window_tensor; + } + + vector> stft_power(vector& y) + { + // Short Time Fourier Transform. The STFT represents a signal in the time-frequency + // domain by computing discrete Fourier transforms (DFT) over short overlapping windows. + // https://en.wikipedia.org/wiki/Short-time_Fourier_transform + + // Pad the time series so that frames are centered + vector new_y; + int num = int(n_fft / 2); + + for (int i = 0; i < num; ++i) + { + new_y.push_back(y[num - i]); + } + for (size_t i = 0; i < y.size(); ++i) + { + new_y.push_back(y[i]); + } + for (size_t i = y.size() - 2; i >= y.size() - num - 1; --i) + { + new_y.push_back(y[i]); + } + + // Compute a window function + vector window_tensor = hanning(); + + // Pad the window out to n_fft size + vector fft_window = pad_window_center(window_tensor, n_fft); + + // Window the time series + vector> y_frames = frame(new_y); + + // Multiply on fft_window + for (size_t i = 0; i < y_frames.size(); ++i) + { + for (size_t j = 0; j < y_frames[0].size(); ++j) + { + y_frames[i][j] *= fft_window[i]; + } + } + + // Transpose frames for computing stft + vector> y_frames_transpose(y_frames[0].size(), vector(y_frames.size())); + for (size_t i = 0; i < y_frames[0].size(); ++i) + { + for (size_t j = 0; j < y_frames.size(); ++j) + { + y_frames_transpose[i][j] = y_frames[j][i]; + } + } + + // Short Time Fourier Transform + // and get power of spectrum + vector> spectrum_power(y_frames_transpose[0].size() / 2 + 1 ); + for (size_t i = 0; i < y_frames_transpose.size(); ++i) + { + Mat dstMat; + dft(y_frames_transpose[i], dstMat, DFT_COMPLEX_OUTPUT); + + // we need only the first part of the spectrum, the second part is symmetrical + for (int j = 0; j < static_cast(y_frames_transpose[0].size()) / 2 + 1; ++j) + { + double power_re = dstMat.at(2 * j) * dstMat.at(2 * j); + double power_im = dstMat.at(2 * j + 1) * dstMat.at(2 * j + 1); + spectrum_power[j].push_back(power_re + power_im); + } + } + return spectrum_power; + } + + Mat calculate_features(vector& x) + { + // Calculates filterbank features matrix. + + // Do preemphasis + std::default_random_engine generator; + std::normal_distribution normal_distr(0, 1); + double dither = 1e-5; + for(size_t i = 0; i < x.size(); ++i) + { + x[i] += dither * static_cast(normal_distr(generator)); + } + double preemph = 0.97; + for (size_t i = x.size() - 1; i > 0; --i) + { + x[i] -= preemph * x[i-1]; + } + + // Calculate Short Time Fourier Transform and get power of spectrum + auto spectrum_power = stft_power(x); + + vector> filterbanks = mel(n_filt, lowfreq, highfreq); + + // Calculate log of multiplication of filterbanks matrix on spectrum_power matrix + vector> x_stft(filterbanks.size(), vector(spectrum_power[0].size(), 0)); + + for (size_t i = 0; i < filterbanks.size(); ++i) + { + for (size_t j = 0; j < filterbanks[0].size(); ++j) + { + for (size_t k = 0; k < spectrum_power[0].size(); ++k) + { + x_stft[i][k] += filterbanks[i][j] * spectrum_power[j][k]; + } + } + for (size_t k = 0; k < spectrum_power[0].size(); ++k) + { + x_stft[i][k] = std::log(x_stft[i][k] + 1e-20); + } + } + + // normalize data + auto elments_num = x_stft[0].size(); + for(size_t i = 0; i < x_stft.size(); ++i) + { + double x_mean = std::accumulate(x_stft[i].begin(), x_stft[i].end(), 0.) / elments_num; // arithmetic mean + double x_std = 0; // standard deviation + for(size_t j = 0; j < elments_num; ++j) + { + double subtract = x_stft[i][j] - x_mean; + x_std += subtract * subtract; + } + x_std /= elments_num; + x_std = sqrt(x_std) + 1e-10; // make sure x_std is not zero + + for(size_t j = 0; j < elments_num; ++j) + { + x_stft[i][j] = (x_stft[i][j] - x_mean) / x_std; // standard score + } + } + + Mat calculate_features(static_cast(x_stft.size()), static_cast(x_stft[0].size()), CV_32F); + for(int i = 0; i < calculate_features.size[0]; ++i) + { + for(int j = 0; j < calculate_features.size[1]; ++j) + { + calculate_features.at(i, j) = static_cast(x_stft[i][j]); + } + } + return calculate_features; + } +}; + +class Decoder { + // Used for decoding the output of jasper model +private: + unordered_map labels_map = fillMap(); + int blank_id = 28; + +public: + unordered_map fillMap() + { + vector labels={' ','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p' + ,'q','r','s','t','u','v','w','x','y','z','\''}; + unordered_map map; + for(int i = 0; i < static_cast(labels.size()); ++i) + { + map[i] = labels[i]; + } + return map; + } + + string decode(Mat& x) + { + // Takes output of Jasper model and performs ctc decoding algorithm to + // remove duplicates and special symbol. Returns prediction + + vector prediction; + for(int i = 0; i < x.size[1]; ++i) + { + double maxEl = -1e10; + int ind = 0; + for(int j = 0; j < x.size[2]; ++j) + { + if (maxEl <= x.at(0, i, j)) + { + maxEl = x.at(0, i, j); + ind = j; + } + } + prediction.push_back(ind); + } + // CTC decoding procedure + vector decoded_prediction = {}; + int previous = blank_id; + + for(int i = 0; i < static_cast(prediction.size()); ++i) + { + if (( prediction[i] != previous || previous == blank_id) && prediction[i] != blank_id) + { + decoded_prediction.push_back(prediction[i]); + } + previous = prediction[i]; + } + + string hypotheses = {}; + for(size_t i = 0; i < decoded_prediction.size(); ++i) + { + auto it = labels_map.find(static_cast(decoded_prediction[i])); + if (it != labels_map.end()) + hypotheses.push_back(it->second); + } + return hypotheses; + } + +}; + +static string predict(Mat& features, dnn::Net net, Decoder decoder) +{ + // Passes the features through the Jasper model and decodes the output to english transcripts. + + // expand 2d features matrix to 3d + vector sizes = {1, static_cast(features.size[0]), + static_cast(features.size[1])}; + features = features.reshape(0, sizes); + + // make prediction + net.setInput(features); + Mat output = net.forward(); + + // decode output to transcript + auto prediction = decoder.decode(output); + return prediction; +} + +static int readAudioFile(vector& inputAudio, string file, int audioStream) +{ + VideoCapture cap; + int samplingRate = 16000; + vector params { CAP_PROP_AUDIO_STREAM, audioStream, + CAP_PROP_VIDEO_STREAM, -1, + CAP_PROP_AUDIO_DATA_DEPTH, CV_32F, + CAP_PROP_AUDIO_SAMPLES_PER_SECOND, samplingRate + }; + cap.open(file, CAP_ANY, params); + if (!cap.isOpened()) + { + cerr << "Error : Can't read audio file: '" << file << "' with audioStream = " << audioStream << endl; + return -1; + } + const int audioBaseIndex = (int)cap.get(CAP_PROP_AUDIO_BASE_INDEX); + vector frameVec; + Mat frame; + for (;;) + { + if (cap.grab()) + { + cap.retrieve(frame, audioBaseIndex); + frameVec = frame; + inputAudio.insert(inputAudio.end(), frameVec.begin(), frameVec.end()); + } + else + { + break; + } + } + return samplingRate; +} + +static int readAudioMicrophone(vector& inputAudio, int microTime) +{ + VideoCapture cap; + int samplingRate = 16000; + vector params { CAP_PROP_AUDIO_STREAM, 0, + CAP_PROP_VIDEO_STREAM, -1, + CAP_PROP_AUDIO_DATA_DEPTH, CV_32F, + CAP_PROP_AUDIO_SAMPLES_PER_SECOND, samplingRate + }; + cap.open(0, CAP_ANY, params); + if (!cap.isOpened()) + { + cerr << "Error: Can't open microphone" << endl; + return -1; + } + + const int audioBaseIndex = (int)cap.get(CAP_PROP_AUDIO_BASE_INDEX); + vector frameVec; + Mat frame; + if (microTime <= 0) + { + cerr << "Error: Duration of audio chunk must be > 0" << endl; + return -1; + } + size_t sizeOfData = static_cast(microTime * samplingRate); + while (inputAudio.size() < sizeOfData) + { + if (cap.grab()) + { + cap.retrieve(frame, audioBaseIndex); + frameVec = frame; + inputAudio.insert(inputAudio.end(), frameVec.begin(), frameVec.end()); + } + else + { + cerr << "Error: Grab error" << endl; + break; + } + } + return samplingRate; +} + +int main(int argc, char** argv) +{ + const String keys = + "{help h usage ? | | This script runs Jasper Speech recognition model }" + "{input_file i | | Path to input audio file. If not specified, microphone input will be used }" + "{audio_duration t | 15 | Duration of audio chunk to be captured from microphone }" + "{audio_stream a | 0 | CAP_PROP_AUDIO_STREAM value }" + "{show_spectrogram s | false | Show a spectrogram of the input audio: true / false / 1 / 0 }" + "{model m | jasper.onnx | Path to the onnx file of Jasper. You can download the converted onnx model " + "from https://drive.google.com/drive/folders/1wLtxyao4ItAg8tt4Sb63zt6qXzhcQoR6?usp=sharing}" + "{backend b | dnn::DNN_BACKEND_DEFAULT | Select a computation backend: " + "dnn::DNN_BACKEND_DEFAULT, " + "dnn::DNN_BACKEND_INFERENCE_ENGINE, " + "dnn::DNN_BACKEND_OPENCV }" + "{target t | dnn::DNN_TARGET_CPU | Select a target device: " + "dnn::DNN_TARGET_CPU, " + "dnn::DNN_TARGET_OPENCL, " + "dnn::DNN_TARGET_OPENCL_FP16 }" + ; + CommandLineParser parser(argc, argv, keys); + if (parser.has("help")) + { + parser.printMessage(); + return 0; + } + + // Load Network + dnn::Net net = dnn::readNetFromONNX(parser.get("model")); + net.setPreferableBackend(parser.get("backend")); + net.setPreferableTarget(parser.get("target")); + + // Get audio + vectorinputAudio = {}; + int samplingRate = 0; + if (parser.has("input_file")) + { + string audio = samples::findFile(parser.get("input_file")); + samplingRate = readAudioFile(inputAudio, audio, parser.get("audio_stream")); + } + else + { + samplingRate = readAudioMicrophone(inputAudio, parser.get("audio_duration")); + } + + if ((inputAudio.size() == 0) || samplingRate <= 0) + { + cerr << "Error: problems with audio reading, check input arguments" << endl; + return -1; + } + + if (inputAudio.size() / samplingRate < 6) + { + cout << "Warning: For predictable network performance duration of audio must exceed 6 sec." + " Audio will be extended with zero samples" << endl; + for(int i = static_cast(inputAudio.size()) - 1; i < samplingRate * 6; ++i) + { + inputAudio.push_back(0); + } + } + + // Calculate features + FilterbankFeatures filter; + auto calculated_features = filter.calculate_features(inputAudio); + + // Show spectogram if required + if (parser.get("show_spectrogram") == true) + { + Mat spectogram; + normalize(calculated_features, spectogram, 0, 255, NORM_MINMAX, CV_8U); + applyColorMap(spectogram, spectogram, COLORMAP_INFERNO); + imshow("spectogram", spectogram); + waitKey(0); + } + + Decoder decoder; + string prediction = predict(calculated_features, net, decoder); + for( auto &transcript: prediction) + { + cout << transcript; + } + + return 0; +} diff --git a/samples/dnn/speech_recognition.py b/samples/dnn/speech_recognition.py index 7bc424b37c..da2ce11521 100644 --- a/samples/dnn/speech_recognition.py +++ b/samples/dnn/speech_recognition.py @@ -44,7 +44,7 @@ import os model.graph.initializer.insert(i,init) ``` - 6. Add an additional reshape node to handle the inconsistant input from python and c++ of openCV. + 6. Add an additional reshape node to handle the inconsistent input from python and c++ of openCV. see https://github.com/opencv/opencv/issues/19091 Make & insert a new node with 'Reshape' operation & required initializer ``` @@ -256,7 +256,7 @@ class FilterbankFeatures: weights *= enorm[:, np.newaxis] return weights - # STFT preperation + # STFT preparation def pad_window_center(self, data, size, axis=-1, **kwargs): ''' Centers the data and pads. @@ -329,7 +329,7 @@ class FilterbankFeatures: then padded with zeros to match n_fft fft_window : a vector or array of length `n_fft` having values computed by a window function - pad_mode : mode while padding the singnal + pad_mode : mode while padding the signal return_complex : returns array with complex data type if `True` return : Matrix of short-term Fourier transform coefficients. ''' diff --git a/samples/gdb/mat_pretty_printer.py b/samples/gdb/mat_pretty_printer.py index e6ad2cbde2..54afd5e1d3 100644 --- a/samples/gdb/mat_pretty_printer.py +++ b/samples/gdb/mat_pretty_printer.py @@ -122,28 +122,38 @@ class Mat: (dtype, ctype) = flags.dtype() elsize = np.dtype(dtype).itemsize - ptr = m['data'] - dataptr = int(ptr) - length = (int(m['dataend']) - dataptr) // elsize - start = (int(m['datastart']) - dataptr) // elsize + shape = size.to_numpy() + steps = np.asarray([int(m['step']['p'][i]) for i in range(len(shape))], dtype=np.int64) - if length == 0: + ptr = m['data'] + # either we are default-constructed or sizes are zero + if int(ptr) == 0 or np.prod(shape * steps) == 0: self.mat = np.array([]) self.view = self.mat return + # we don't want to show excess brackets + if flags.channels() != 1: + shape = np.append(shape, flags.channels()) + steps = np.append(steps, elsize) + + # get the length of contiguous array from data to the last element of the matrix + length = 1 + np.sum((shape - 1) * steps) // elsize + if dtype != np.float16: + # read all elements into self.mat ctype = gdb.lookup_type(ctype) ptr = ptr.cast(ctype.array(length - 1).pointer()).dereference() self.mat = np.array([ptr[i] for i in range(length)], dtype=dtype) else: + # read as uint16_t and then reinterpret the bytes as float16 u16 = gdb.lookup_type('uint16_t') ptr = ptr.cast(u16.array(length - 1).pointer()).dereference() self.mat = np.array([ptr[i] for i in range(length)], dtype=np.uint16) self.mat = self.mat.view(np.float16) - steps = np.asarray([int(m['step']['p'][i]) for i in range(size.dims())], dtype=np.int64) - self.view = np.lib.stride_tricks.as_strided(self.mat[start:], shape=size.to_numpy(), strides=steps) + # numpy will do the heavy lifting of strided access + self.view = np.lib.stride_tricks.as_strided(self.mat, shape=shape, strides=steps) def __iter__(self): return iter({'data': stri(self.view)}.items()) diff --git a/samples/python/stitching_detailed.py b/samples/python/stitching_detailed.py index 3e0ec50acd..4371745994 100644 --- a/samples/python/stitching_detailed.py +++ b/samples/python/stitching_detailed.py @@ -324,7 +324,10 @@ def main(): is_work_scale_set = True img = cv.resize(src=full_img, dsize=None, fx=work_scale, fy=work_scale, interpolation=cv.INTER_LINEAR_EXACT) if is_seam_scale_set is False: - seam_scale = min(1.0, np.sqrt(seam_megapix * 1e6 / (full_img.shape[0] * full_img.shape[1]))) + if seam_megapix > 0: + seam_scale = min(1.0, np.sqrt(seam_megapix * 1e6 / (full_img.shape[0] * full_img.shape[1]))) + else: + seam_scale = 1.0 seam_work_aspect = seam_scale / work_scale is_seam_scale_set = True img_feat = cv.detail.computeImageFeatures2(finder, img) @@ -345,9 +348,9 @@ def main(): img_names_subset = [] full_img_sizes_subset = [] for i in range(len(indices)): - img_names_subset.append(img_names[indices[i, 0]]) - img_subset.append(images[indices[i, 0]]) - full_img_sizes_subset.append(full_img_sizes[indices[i, 0]]) + img_names_subset.append(img_names[indices[i]]) + img_subset.append(images[indices[i]]) + full_img_sizes_subset.append(full_img_sizes[indices[i]]) images = img_subset img_names = img_names_subset full_img_sizes = full_img_sizes_subset @@ -479,7 +482,7 @@ def main(): blender = cv.detail.Blender_createDefault(cv.detail.Blender_NO) elif blend_type == "multiband": blender = cv.detail_MultiBandBlender() - blender.setNumBands((np.log(blend_width) / np.log(2.) - 1.).astype(np.int)) + blender.setNumBands((np.log(blend_width) / np.log(2.) - 1.).astype(np.int32)) elif blend_type == "feather": blender = cv.detail_FeatherBlender() blender.setSharpness(1. / blend_width) @@ -513,6 +516,5 @@ def main(): if __name__ == '__main__': - print(__doc__) main() cv.destroyAllWindows()